In [96]:
import json
import os
import torch
from tqdm import tqdm
import scipy.io.wavfile as wav

from torch import nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, DataLoader, Sampler
from torch.utils.tensorboard import SummaryWriter

import numpy as np

In [97]:
from models import ConvFeatureEncoder, SegmentsRepr, SegmentsEncoder, EncoderModel, SegmentPredictor, FinModel
from utils import ConstrativeLoss, sample_negatives
from trainer import Trainer

In [98]:
m = FinModel()

In [99]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [100]:
class Dataset_check:
    
    def __init__(self, path, manifest_path, train = True):
        with open(manifest_path, 'r') as json_file:
            manifest = json.load(json_file)
        if train:
            self.manifest = manifest[:8000]
        else:
            self.manifest = manifest[8000:]
        self.path = path
    
    def __len__(self):
        return len(self.manifest)
    
    def __getitem__(self, ind):
        audio_filepath = self.manifest[ind]['audio_filepath']
        audio_file = os.path.join(self.path, audio_filepath)
        sampling_rate, signal = wav.read(audio_file)
        
        return {'sample': signal, 'length': len(signal)}

In [101]:
class Dataset:
    
    def __init__(self, path, manifest_path, train = True):
        with open(manifest_path, 'r') as json_file:
            manifest = json.load(json_file)
        self.manifest = manifest
        self.path = path
    
    def __len__(self):
        return len(self.manifest)
    
    def __getitem__(self, ind):
        audio_filepath = self.manifest[ind]['audio_filepath']
        audio_file = os.path.join(self.path, audio_filepath)
        sampling_rate, signal = wav.read(audio_file)
        
        return {'sample': signal, 'length': len(signal)}

In [83]:
def collate_fn(samples):
    
    max_length = max([sample['length'] for sample in samples])
    samples1 = []
    lengths = []
    samplings = []
    for sample in samples:
        to_add_l = max_length-sample['length']
        sample1 = list(sample['sample'])+[0]*to_add_l
        samples1.append(torch.Tensor(sample1).unsqueeze(0))
        lengths.append(sample['length'])
        
    batch = torch.cat(samples1)
    lengths = torch.Tensor(lengths)
    return dict(batch=batch, lengths=lengths)

In [84]:
train_dataset = Dataset('test/crowd', 'test/crowd/manifest.json', train = True)
val_dataset = Dataset('test/crowd', 'test/crowd/manifest.json', train = False)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

In [85]:
# train_dataset = Dataset('train/', 'manifest_train.json', train = True)
# val_dataset = Dataset('train/', 'manifest_val.json', train = False)
# train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn = collate_fn)
# val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

In [86]:
for batch in train_loader:
    break

In [87]:
batch

{'batch': tensor([[-18., -19.,  -8.,  ...,   0.,   0.,   0.],
         [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
         [  1.,   1.,   0.,  ...,   0.,   0.,   0.],
         ...,
         [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
         [  0.,   0.,   0.,  ...,   0.,   0.,   0.],
         [ 22.,   4.,  -6.,  ...,   0.,   0.,   0.]]),
 'lengths': tensor([17600., 88000., 53440., 57280., 93988., 36397., 35520., 37120.])}

In [88]:
x = batch['batch']
lengths = batch['lengths']

In [89]:
xx = m(x)

In [90]:
# xx = m.compute_all(x, num_epoch=0)

In [91]:
# xx

In [92]:
train_dataset = Dataset_check('test/crowd/', 'test/crowd/manifest.json', train = True)
val_dataset = Dataset_check('test/crowd', 'test/crowd/manifest.json', train = False)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

model = FinModel()
opt = torch.optim.Adam(model.parameters(), lr=0.0001)
lr_sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.1,
                                                      patience=5, threshold=0.0001, threshold_mode='rel',
                                                      cooldown=0, min_lr=0, eps=1e-08, verbose=False)
trainer = Trainer(model=model,
                  optimizer=opt,
                  train_dataloader=train_loader,
                  val_dataloader=val_loader,
                  tboard_log_dir='./tboard_logs/',
                  lr_scheduler=lr_sched,
                  lr_scheduler_type='per_epoch',
                  device = 'cpu')




In [93]:
for batch in train_loader:
    break

  from typing import Optional, Dict, Set, Iterator, Callable, cast


In [94]:
%tensorboard --logdir ./tboard_logs

Launching TensorBoard...

In [95]:
val_acc = trainer.train(num_epochs=20)

Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val
Val


, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional, Dict, Set, Iterator, Callable, cast
  from typing import Optional,

In [27]:
#!c1.4
train_dataset = Dataset('test/crowd/', 'test/crowd/manifest.json', train = True)
val_dataset = Dataset('test/crowd', 'test/crowd/manifest.json', train = False)
train_loader = DataLoader(train_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

model = FinModel()
opt = torch.optim.Adam(model.parameters(), lr=0.001)
lr_sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.1,
                                                      patience=5, threshold=0.0001, threshold_mode='rel',
                                                      cooldown=0, min_lr=0, eps=1e-08, verbose=False)
trainer = Trainer(model=model,
                  optimizer=opt,
                  train_dataloader=train_loader,
                  val_dataloader=val_loader,
                  tboard_log_dir='./tboard_logs_mean/',
                  lr_scheduler=lr_sched,
                  lr_scheduler_type='per_epoch',
                  device = 'cpu')




In [28]:
%tensorboard --logdir ./tboard_logs_mean

Launching TensorBoard...

In [29]:
val_acc = trainer.train(num_epochs=5)

# Другие эксперименты

In [20]:
train_dataset = Dataset('test/crowd/', 'test/crowd/manifest.json')
val_dataset = Dataset('test/farfield', 'test/farfield/manifest.json')
train_loader = DataLoader(train_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

model = EncoderModel(n_negatives = 5)
opt = torch.optim.Adam(model.parameters(), lr=0.0001)
lr_sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.1,
                                                      patience=5, threshold=0.0001, threshold_mode='rel',
                                                      cooldown=0, min_lr=0, eps=1e-08, verbose=False)
trainer = Trainer(model=model,
                  optimizer=opt,
                  train_dataloader=train_loader,
                  val_dataloader=val_loader,
                  tboard_log_dir='./tboard_logs_5_neg/',
                  lr_scheduler=lr_sched,
                  lr_scheduler_type='per_epoch',
                  device = 'cpu')




In [21]:
%tensorboard --logdir ./tboard_logs_5_neg

Launching TensorBoard...

In [None]:
val_acc = trainer.train(num_epochs=5)

In [None]:
train_dataset = Dataset('test/crowd/', 'test/crowd/manifest.json')
val_dataset = Dataset('test/farfield', 'test/farfield/manifest.json')
train_loader = DataLoader(train_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

model = EncoderModel(n_negatives = 1)
opt = torch.optim.Adam(model.parameters(), lr=0.0001)
lr_sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.1,
                                                      patience=5, threshold=0.0001, threshold_mode='rel',
                                                      cooldown=0, min_lr=0, eps=1e-08, verbose=False)
trainer = Trainer(model=model,
                  optimizer=opt,
                  train_dataloader=train_loader,
                  val_dataloader=val_loader,
                  tboard_log_dir='./tboard_logs_1_neg/',
                  lr_scheduler=lr_sched,
                  lr_scheduler_type='per_epoch',
                  device = 'cpu')


In [None]:
%tensorboard --logdir ./tboard_logs_1_neg

In [None]:
val_acc = trainer.train(num_epochs=5)

In [None]:
train_dataset = Dataset('test/crowd/', 'test/crowd/manifest.json')
val_dataset = Dataset('test/farfield', 'test/farfield/manifest.json')
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=8, collate_fn = collate_fn)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=8, collate_fn = collate_fn)

model = EncoderModel()
opt = torch.optim.SGD(model.parameters(), lr=0.0001)
lr_sched = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.1,
                                                      patience=5, threshold=0.0001, threshold_mode='rel',
                                                      cooldown=0, min_lr=0, eps=1e-08, verbose=False)
trainer = Trainer(model=model,
                  optimizer=opt,
                  train_dataloader=train_loader,
                  val_dataloader=val_loader,
                  tboard_log_dir='./tboard_logs_SGD/',
                  lr_scheduler=lr_sched,
                  lr_scheduler_type='per_epoch',
                  device = 'cpu')


In [None]:
%tensorboard --logdir ./tboard_logs_SGD

In [None]:
val_acc = trainer.train(num_epochs=5)

# Проверки

In [17]:
empty_model = EncoderModel()
model = trainer.model




In [18]:
from collections import defaultdict
device = 'cpu'

In [19]:
val_losses = []

val_logs = defaultdict(list)
model.eval()
for batch in tqdm(val_loader):
    batch = {k: v.to() for k, v in batch.items()}
    loss, details = model.compute_all(batch)
    val_losses.append(loss.item())
    for k, v in details.items():
        val_logs[k].append(v)

val_logs = {k: np.mean(v) for k, v in val_logs.items()}

100%|██████████| 125/125 [01:05<00:00,  1.90it/s]


In [20]:
val_logs

{'loss': 8291.42409375, 'acc': 0.3669958241416285}

In [21]:
empty_val_losses = []
empty_model.eval()
empty_val_logs = defaultdict(list)
for batch in tqdm(val_loader):
    batch = {k: v.to() for k, v in batch.items()}
    loss, details = empty_model.compute_all(batch)
    empty_val_losses.append(loss.item())
    for k, v in details.items():
        empty_val_logs[k].append(v)

empty_val_logs = {k: np.mean(v) for k, v in empty_val_logs.items()}

100%|██████████| 125/125 [01:06<00:00,  1.89it/s]


In [22]:
empty_val_logs

{'loss': 8081.74873046875, 'acc': 0.46712194600083917}

In [None]:
x, targets, negs = model(batch)

In [None]:
x1, targets1, negs1 = empty_model(batch)

In [None]:
x[0, :, :]

In [None]:
x1[0, :, :]

In [None]:
loss1 = ConstrativeLoss()

In [None]:
l1 = loss1(x, targets, negs)

In [None]:
l2 = loss1(x1, targets1, negs1)

In [None]:
l1

In [None]:
l2

In [None]:
sim = loss1._calculate_similarity(x, negs, targets)

In [None]:
sim1 = loss1._calculate_similarity(x1, negs1, targets1)

In [None]:
class ConstrativeLoss(nn.Module):

    def __init__(self, logit_temp: float = 1.0, 
                 cut = True, reduction = 'sum'):
        """
        Compute the contrastive loss with respect to the model outputs and sampled negatives from quantizer codebooks.
        Args:
            logit_temp: Temperature normalization applied in loss.
            reduce: Reduce loss via sum reduction (Default true)
        """
        super().__init__()
        self.logit_temp = logit_temp
        self.cut =  cut
        self.reduction = reduction

    def forward(
        self,
        logits: torch.tensor,
        targets: torch.tensor,
        negatives: torch.tensor,
    ) -> [torch.tensor, torch.tensor, torch.tensor]:
        """
        Args:
            logits: Model activations
            targets: The true target quantized representations
            negatives: Sampled negatives from the quantizer codebooks. Sampled from all other timesteps.
            feature_loss: Feature penalty (L2 Norm)
        Returns:
            output loss values, acc_score
        """

        # Calculate similarity between logits and all targets, returning FxBxT
        similarity_scores = self._calculate_similarity(logits, negatives, targets)

        # Create targets of size B*T
        similarity_targets = logits.new_zeros(similarity_scores.size(1) * similarity_scores.size(2), dtype=torch.long)

        # Transpose similarity scores to (T*B)xF for loss
        similarity_scores = similarity_scores.transpose(0, 2)
        similarity_scores = similarity_scores.reshape(-1, similarity_scores.size(-1))

        loss = F.cross_entropy(similarity_scores, similarity_targets, reduction=self.reduction)

#         if self.calc_acc:
        acc_score = np.mean(np.array(torch.argmax(similarity_scores, dim = 1) == 0))
        return loss, acc_score
#         else:
#             return loss

    def _calculate_similarity(self, logits, negatives, targets):
#         neg_is_pos = (targets == negatives).all(-1)
#         print(neg_is_pos)
        targets = targets.unsqueeze(0)
        targets = torch.cat([targets, negatives], dim=0) 
        if self.cut:
            logits = logits[:, :-1, :]
            targets = targets[:, :, :-1, :]
        logits = torch.cosine_similarity(logits.float(), targets.float(), dim=-1).type_as(logits)
        logits /= self.logit_temp
#         if neg_is_pos.any():
#             logits[1:][neg_is_pos] = float("-inf")
        return logits