In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:

import shutil
import os
os.makedirs('./embeds', exist_ok=True)

In [3]:
import multiprocessing.dummy
import tqdm
import os

## Training Setup

In [4]:
vector_size = 768 #527
passt_input_length_seconds = 30 #10

## Data transfering

In [5]:
def helper(e):
    name = e.split('/')[-1]
    new = '/content/embeds/'+name

    if os.path.exists(new):
      return 1
    shutil.copy(e,new)
    return 1

In [6]:
p = multiprocessing.dummy.Pool(30)

In [None]:
from pathlib import Path
while True:
  try:
    embeddings_dir_files = []
    embeddings_dir = f"/content/drive/MyDrive/Acoustic-Scene-Classification-and-Time-of-Day-Estimation/embeds_{passt_input_length_seconds}_sec"
    embeddings_dir_files += [str(x) for x in Path(embeddings_dir).rglob('*.pkl')]
    embeddings_dir_files = list(set(embeddings_dir_files))
    r = list(tqdm.tqdm(p.imap_unordered(helper,embeddings_dir_files),total=len(embeddings_dir_files)))
  except:
    import traceback
    print(traceback.format_exc())
    continue
  break

  1%|          | 574/104814 [00:19<2:32:26, 11.40it/s]

In [None]:
p.close()

## Training setup

In [None]:

import os
import pandas as pd
import librosa
import torch
torch.manual_seed(0)
import traceback

import torch
import soundfile as sf
import skimage.measure
import numpy as np

import tqdm

from pathlib import Path
import pickle

from torch import nn

import torch.nn.functional as F

from io import BytesIO
from torch.nn import Sequential,Linear,ELU,Sigmoid, BatchNorm1d, Dropout1d, ReLU, LeakyReLU

In [None]:
def filename_to_time(file_name):
    file_name = file_name.split('/')[-1].split('.')[0]
    timestam_str = file_name.split("_")[-1]
    timestamp = float(int(timestam_str[0:2])*60 + int(timestam_str[2:4]) ) / 1440
    return timestamp

In [None]:

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

class EmbeddingsDataset(torch.utils.data.Dataset):
    def __init__(self,embeddings_dir, in_ram = False):

        self.embeddings_dir_files = []
        for x in Path(embeddings_dir).rglob('*'):
            try:
                self.embeddings_dir_files.append((filename_to_time(str(x)),str(x)))
            except:
                pass

        self.in_ram = in_ram
        self.times = []
        self.embeds = {}
        if vector_size==768:
          self.passt_embedding_index = 1
        elif vector_size==527:
          self.passt_embedding_index = 0
        else:
          raise
        self.good_files = []

        for time,file_name in tqdm.tqdm(self.embeddings_dir_files[:]):
              try:
                if in_ram:
                  with open(file_name,'rb') as f:
                      embed = pickle.load(f)
                  self.embeds[file_name] = embed[self.passt_embedding_index].cpu()
                self.times.append(time)
                self.good_files.append((time,file_name))
              except:
                  import traceback
                  print(traceback.format_exc())
                  print(file_name)

        self.std = torch.std(torch.tensor(self.times))
        self.mean = torch.mean(torch.tensor(self.times))

    def __len__(self):
        'Denotes the total number of samples'
        if self.embeds != []:
          return len(self.embeds)
        return len(self.good_files)

    print('USING {} layer')

    def __getitem__(self, index):
        try:
          'Generates one sample of data'
          time, file_name = self.good_files[index]
          if self.in_ram:
            embed =  self.embeds[file_name]
          else:
            with open(file_name,'rb') as f:
              embed = pickle.load(f)
              embed = embed[self.passt_embedding_index]


          return embed.reshape(-1),time
        except EOFError as e:
          print(e)
          return None

In [None]:

def col(batch):
  try:
    e = torch.stack([x[0] for x in batch if x != None])
    t = torch.stack([torch.tensor(x[1]) for x in batch if x != None])
    return e,t
  except:
    return torch.zeros(10)

In [None]:
# embeddings_dir = "/content/drive/MyDrive/Acoustic-Scene-Classification-and-Time-of-Day-Estimation/embeds3"
embeddings_dir = '/content/embeds'
ds = EmbeddingsDataset(embeddings_dir,in_ram=True)
train_set, val_set, test_set = torch.utils.data.random_split(ds, [0.7, 0.15, 0.15])

In [None]:
import matplotlib.pyplot as plt
times = []
for d in tqdm.tqdm(train_set):
  if d is not None:
    times.append(d[1])
times = torch.tensor(times)

In [None]:
h = plt.hist(times,bins=24)

In [None]:
train_times = np.array(([x[1] * 24 for x in train_set]))
weights=(1 / (h[0] / h[0].sum()))[np.floor(train_times).astype(np.int)]

In [None]:
from torch.utils.data import WeightedRandomSampler
sampler = WeightedRandomSampler(weights, len(weights))

In [None]:
batch_size = 128
dl_train = torch.utils.data.DataLoader(train_set,batch_size=batch_size,num_workers=2,collate_fn=col,shuffle=True)
dl_train_balanced = torch.utils.data.DataLoader(train_set,batch_size=batch_size,num_workers=2,collate_fn=col, sampler=sampler)

test_dl_train = torch.utils.data.DataLoader(train_set,batch_size=256*20,num_workers=2,collate_fn=col,shuffle=False)
dl_val = torch.utils.data.DataLoader(val_set,batch_size=256*20,num_workers=2,shuffle=False,collate_fn=col)

dl_test = torch.utils.data.DataLoader(test_set,batch_size=256*20,num_workers=2,shuffle=False,collate_fn=col)

In [None]:
def CyclicL1Loss(output,target, max_val = 1):
    criterion = lambda x,y: torch.abs(x-y)
    return torch.stack([criterion(output, target), criterion(output-max_val, target), criterion(output+max_val, target)]).min(dim=0).values
def CyclicMSELoss(output,target, max_val = 1):
    criterion = lambda x,y: (x-y)**2
    return torch.stack([criterion(output, target), criterion(output-max_val, target), criterion(output+max_val, target)]).min(dim=0).values

In [None]:
criterion = CyclicMSELoss
criterion_train = CyclicL1Loss

In [None]:
hideen_size = 512

num_hidden_layers = 3

hidden_layers = []

for num_hidden_layer in range(num_hidden_layers):
   hidden_layers.append(Linear(hideen_size,hideen_size))
   hidden_layers.append(torch.nn.Tanh())

simple_dnn = nn.Sequential(*[Linear(768,hideen_size),torch.nn.Tanh()]+hidden_layers+[Linear(hideen_size,1),Sigmoid()]).cuda()

print(f'amount of parameters {count_parameters(simple_dnn)}')

In [None]:
optim = torch.optim.Adam(simple_dnn.parameters(), lr=0.0001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optim, 'min')

In [None]:
def get_grad_norm(model):
    grads = [
        param.grad.detach().flatten()
        for param in model.parameters()
        if param.grad is not None
    ]
    norm = torch.cat(grads).norm()
    return norm.item()

In [None]:

def eval_model_rmse(model,data_loader):
  model.eval()
  losses = []

  for x in tqdm.tqdm_notebook(data_loader,total=len(data_loader.dataset)//data_loader.batch_size):
      with torch.no_grad():
        embed,target = x[0].cuda(),x[1].float().cuda()
        output = model(embed).squeeze(-1).squeeze(-1)
        loss = criterion(output, target)
        losses.append(loss)
  losses = torch.cat(losses)
  # print(losses.shape)
  model.train()
  return (losses.mean() ** 0.5 * 1440, (losses ** 0.5).mean()* 1440)

todo: try volume rms (std,mean) as simple features for comparison

In [None]:
torch.backends.cudnn.benchmark = True

In [None]:
# ls "/content/drive/MyDrive/Acoustic-Scene-Classification-and-Time-of-Day-Estimation/runs"

In [None]:
!pkill -f tensorboard
%load_ext tensorboard
%tensorboard --logdir "/content/drive/MyDrive/Acoustic-Scene-Classification-and-Time-of-Day-Estimation/runs/tensorboard" --bind_all

In [None]:
import datetime
model_time = datetime.datetime.now()
save_dir ='/content/drive/MyDrive/Acoustic-Scene-Classification-and-Time-of-Day-Estimation/runs'

In [None]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter(log_dir=f"{save_dir}/tensorboard/{model_time}")

n_iter = 0

In [None]:
for x in dl_train:
  pass
writer.add_graph(simple_dnn,x[0].cuda())

In [None]:
scale_times = True
eps = 0.000001
label_smooth_scale = 0.0001
use_label_smoothing = True
print(f'Label smoothing turned to: {use_label_smoothing}')
for epoch in tqdm.tqdm_notebook(range(100)):
  losses = []
  for x in dl_train_balanced:
    simple_dnn.train()
    embed,target = x[0].cuda(), x[1].cuda().float()
    if use_label_smoothing:
      target = target + torch.rand(len(target)).cuda()* label_smooth_scale
    output = simple_dnn(embed).squeeze(-1)
    optim.zero_grad()
    loss = criterion_train(output, target).mean()# + gen_disc_loss
    # kl_loss = torch.nn.functional.kl_div(torch.log(output).cuda(),target)
    # loss += kl_loss + 0.1
    loss.backward()
    torch.nn.utils.clip_grad_norm_(simple_dnn.parameters(), 1)
    optim.step()
    losses.append(loss.item())
    rtl = torch.tensor(losses).mean()** 0.5 * 1440
    writer.add_scalar('Loss/running_train_loss', rtl, n_iter)
    n_iter +=1

  if epoch % 10 == 0:
    torch.save(simple_dnn,f'{save_dir}/models/768_logits_30_sec_3_{model_time}.ckpt')
    train_loss, train_mae = eval_model_rmse(simple_dnn,test_dl_train)
    print(f'train loss: {train_loss} \n')
    val_loss, val_mae = eval_model_rmse(simple_dnn,dl_val)
    print(f'val loss: {val_loss} \n')
    scheduler.step(val_loss)
    grad_norm = get_grad_norm(simple_dnn)
    writer.add_scalar('Loss/train', train_loss, n_iter)
    writer.add_scalar('Loss/test', val_loss, n_iter)
    writer.add_scalar('lr',scheduler.optimizer.param_groups[0]['lr'],n_iter)
    writer.add_scalar('grad_norm',grad_norm,n_iter)

In [None]:
test_loss, test_mae = eval_model_rmse(simple_dnn,dl_test)
print(f'test loss: {val_loss} \n')

## Testing Unseen Dataset (Only 30 seconds setup uvailable)

In [None]:
import plotly.express as px

In [None]:
p= multiprocessing.dummy.Pool(30)
os.makedirs('./embeds_test', exist_ok=True)
def helper(e):
    name = e.split('/')[-1]
    new = '/content/embeds_test/'+name

    if os.path.exists(new):
      return 1
    shutil.copy(e,new)
    return 1


while True:
  try:
    embeddings_dir_files = []
    embeddings_dir = "/content/drive/MyDrive/Acoustic-Scene-Classification-and-Time-of-Day-Estimation/after_corona_30sec"
    embeddings_dir_files += [str(x) for x in Path(embeddings_dir).rglob('*.pkl')]
    embeddings_dir_files = list(set(embeddings_dir_files))
    r = list(tqdm.tqdm(p.imap_unordered(helper,embeddings_dir_files),total=len(embeddings_dir_files)))
  except:
    import traceback
    print(traceback.format_exc())
    continue
  break

In [None]:
embeddings_dir = '/content/embeds_test'
ds_enroll = EmbeddingsDataset(embeddings_dir,in_ram=True)
batch_size = 128

In [None]:
import glob

dfs = []

save_dir ='/content/drive/MyDrive/Acoustic-Scene-Classification-and-Time-of-Day-Estimation/runs'
model_ckpts = glob.glob(save_dir+'/models/*')
for i,model_path in enumerate(model_ckpts):
  if '768_logits_30_sec_3_' not in model_path:
    continue
  print(model_path)
  model = torch.load(model_path)
  model.eval()

  test_dl = torch.utils.data.DataLoader(ds_enroll)
  target = []
  output = []
  vecs = []
  for x in tqdm.tqdm_notebook(test_dl):
    target.append(x[1])
    with torch.no_grad():
      r = model(x[0].cuda())
    output.append(r.cpu())
    vecs.append(x[0])

  target = torch.cat(target) * 1440
  output = torch.cat(output).squeeze(1) * 1440

  vecs = torch.cat(vecs)
  import pandas as pd
  df = pd.DataFrame(torch.stack([output,target, torch.ones_like(target)*len(dfs)]).T,columns=['out','trg','model_type'])
  df['loss'] = criterion(output, target, max_val=1440)
  df = df.sort_values('trg')
  dfs.append(df)
  print(df.loss.mean() ** 0.5)


In [None]:
px.histogram(df,'trg',histnorm='probability')

In [None]:
px.histogram(df,'out',histnorm='probability')

In [None]:
px.scatter(df,'trg','out')

In [None]:
pip install torchview

In [None]:
from torchview import draw_graph

In [None]:
model_graph = draw_graph(model, input_size=(1,768), expand_nested=True, save_graph=True)
model_graph.visual_graph


In [None]:
df.loss.mean() ** 0.5