In [1]:
import os
from datetime import datetime as dt
from random import randint
from collections import defaultdict
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('dark_background')

import torchaudio
import torch
import torch.nn as nn
import torch.nn.functional as F
from tensorboardX import SummaryWriter

from hparams import Hparam
from data.dataset import SpeechDataset
from CPC.model import CPCModel
# from CPC_classifiers.speaker_model import SpeakerClassificationCPC

In [2]:
config = Hparam('./CPC/config.yaml')
# config.train.device = 'cpu'
gettime = lambda: str(dt.time(dt.now()))[:8]

## model

In [3]:
ds = SpeechDataset(config.data.path)


100%|██████████| 2703/2703 [00:00<00:00, 7480.06it/s]


In [5]:
train_dl = torch.utils.data.DataLoader(ds, batch_size=12)

for b in train_dl:
    sp, utt = b
    print(sp, utt)
    break

tensor([26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26]) tensor([[-1.2491e-01, -1.0147e-01, -6.9641e-02,  ...,  7.3334e-02,
          5.3802e-02,  3.0426e-02],
        [-1.9836e-03,  1.8311e-04,  2.5330e-03,  ...,  7.8461e-02,
          6.4392e-02,  4.5380e-02],
        [-1.3733e-03, -1.8005e-03, -1.8616e-03,  ...,  3.8757e-03,
          3.5095e-03,  2.5330e-03],
        ...,
        [ 7.7820e-03, -9.1553e-05, -6.6223e-03,  ..., -5.4779e-02,
         -9.8572e-02, -1.3077e-01],
        [-1.7276e-01, -9.8175e-02, -1.6388e-02,  ...,  6.1035e-03,
         -9.4604e-03,  5.5542e-03],
        [ 1.7242e-02, -1.6510e-02,  1.4587e-02,  ...,  2.7771e-03,
          2.8381e-03,  2.3499e-03]])


In [None]:
b

In [6]:
model_cpc = CPCModel(config).to(config.train.device)
# model_cpc.load_state_dict(torch.load('checkpoints/cpc_model_35_epoch.pt', map_location=config.train.device))

## experiments

In [7]:
sample = ds[27][1].unsqueeze(0).unsqueeze(0).to(config.train.device)
sample

tensor([[[-0.0482, -0.0498, -0.0497,  ..., -0.0154, -0.0147, -0.0119]]])

In [None]:
out = model_cpc.predict(sample)
[t.size() for t in out]

In [9]:
model_cpc.get_summary(sample.squeeze())

                                     Kernel Shape    Output Shape     Params  \
Layer                                                                          
0_convolutions.Conv1d_0              [1, 512, 10]  [1, 512, 4095]     5.632k   
1_convolutions.ReLU_1                           -  [1, 512, 4095]          -   
2_convolutions.BatchNorm1d_2                [512]  [1, 512, 4095]     1.024k   
3_convolutions.Conv1d_3             [512, 512, 8]  [1, 512, 1023]  2.097664M   
4_convolutions.ReLU_4                           -  [1, 512, 1023]          -   
5_convolutions.BatchNorm1d_5                [512]  [1, 512, 1023]     1.024k   
6_convolutions.Conv1d_6             [512, 512, 4]   [1, 512, 512]  1.049088M   
7_convolutions.ReLU_7                           -   [1, 512, 512]          -   
8_convolutions.BatchNorm1d_8                [512]   [1, 512, 512]     1.024k   
9_convolutions.Conv1d_9             [512, 512, 4]   [1, 512, 257]  1.049088M   
10_convolutions.ReLU_10                 

## workbench

In [None]:
c = nn.Conv1d(5, 5, kernel_size=1)

In [None]:
c.weight.size()

In [None]:
t = torch.Tensor(1, 5, 166)

In [None]:
c(t).size()

In [None]:
c.state_dict()

In [None]:
model_cpc.get_summary(ds[1][1])

In [None]:
out = model_cpc.predict(ds[1][1].unsqueeze(0).unsqueeze(0).cuda())
[t.size() for t in out]