# **Model Version: 0.1 + gridsearch**


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### ***Imports***

In [None]:
import torch.nn as nn
import torch
import torch.nn.functional as F
import os
import numpy as np
import librosa
import random
from torch.utils.data import DataLoader
from torch import optim
import argparse
from sklearn.metrics import accuracy_score

### **Models**

In [None]:
class TDNN(nn.Module):
    
    def __init__(
                    self, 
                    input_dim=23, 
                    output_dim=512,
                    context_size=5,
                    stride=1,
                    dilation=1,
                    batch_norm=False,
                    dropout_p=0.2
                ):
        '''
        TDNN as defined by https://www.danielpovey.com/files/2015_interspeech_multisplice.pdf

        Affine transformation not applied globally to all frames but smaller windows with local context

        batch_norm: True to include batch normalisation after the non linearity
        
        Context size and dilation determine the frames selected
        (although context size is not really defined in the traditional sense)
        For example:
            context size 5 and dilation 1 is equivalent to [-2,-1,0,1,2]
            context size 3 and dilation 2 is equivalent to [-2, 0, 2]
            context size 1 and dilation 1 is equivalent to [0]
        '''
        super(TDNN, self).__init__()
        self.context_size = context_size
        self.stride = stride
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.dilation = dilation
        self.dropout_p = dropout_p
        self.batch_norm = batch_norm
      
        self.kernel = nn.Linear(input_dim*context_size, output_dim)
        self.nonlinearity = nn.ReLU()
        if self.batch_norm:
            self.bn = nn.BatchNorm1d(output_dim)
        if self.dropout_p:
            self.drop = nn.Dropout(p=self.dropout_p)
        
    def forward(self, x):
        '''
        input: size (batch, seq_len, input_features)
        outpu: size (batch, new_seq_len, output_features)
        '''
        
        _, _, d = x.shape
        assert (d == self.input_dim), 'Input dimension was wrong. Expected ({}), got ({})'.format(self.input_dim, d)
        x = x.unsqueeze(1)

        # Unfold input into smaller temporal contexts
        x = F.unfold(
                        x, 
                        (self.context_size, self.input_dim), 
                        stride=(1,self.input_dim), 
                        dilation=(self.dilation,1)
                    )

        # N, output_dim*context_size, new_t = x.shape
        x = x.transpose(1,2)
        x = self.kernel(x.float())
        x = self.nonlinearity(x)
        
        if self.dropout_p:
            x = self.drop(x)

        if self.batch_norm:
            x = x.transpose(1,2)
            x = self.bn(x)
            x = x.transpose(1,2)

        return x

In [None]:
class X_vector(nn.Module):
    # changed for grid-search starts
    def __init__(self, input_dim = 40, num_classes=10, context_size_layer_1=5, dilation_layer_1=1, context_size_layer_2=3, dilation_layer_2=1, context_size_layer_3=2, dilation_layer_3=2):
        super(X_vector, self).__init__()
        i_d = input_dim
        self.tdnn1 = TDNN(input_dim=i_d, output_dim=512, context_size=context_size_layer_1, dilation=dilation_layer_1, dropout_p=0.5)
        self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=context_size_layer_2, dilation=dilation_layer_2, dropout_p=0.5)
        self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=context_size_layer_3, dilation=dilation_layer_3, dropout_p=0.5)
    # changed for grid-search end
        self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1,dropout_p=0.5)
        self.tdnn5 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=3,dropout_p=0.5)
        #### Frame levelPooling
        self.segment6 = nn.Linear(1024, 512)
        self.segment7 = nn.Linear(512, 512)
        self.output = nn.Linear(512, num_classes)
        self.softmax = nn.Softmax(dim=1)
    def forward(self, inputs):
        tdnn1_out = self.tdnn1(inputs)
        return tdnn1_out
        tdnn2_out = self.tdnn2(tdnn1_out)
        tdnn3_out = self.tdnn3(tdnn2_out)
        tdnn4_out = self.tdnn4(tdnn3_out)
        tdnn5_out = self.tdnn5(tdnn4_out)
        ### Stat Pool
        mean = torch.mean(tdnn5_out,1)
        std = torch.std(tdnn5_out,1)
        stat_pooling = torch.cat((mean,std),1)
        segment6_out = self.segment6(stat_pooling)
        x_vec = self.segment7(segment6_out)
        predictions = self.softmax(self.output(x_vec))
        return predictions,x_vec

In [None]:
class X_vector(nn.Module):
  # changed for grid-search starts
    def __init__(self, input_dim = 40, num_classes=10, context_size_layer_1=5, dilation_layer_1=1, context_size_layer_2=3, dilation_layer_2=1, context_size_layer_3=2, dilation_layer_3=2):
        super(X_vector, self).__init__()
        i_d = input_dim
        self.tdnn1 = TDNN(input_dim=i_d, output_dim=512, context_size=context_size_layer_1, dilation=dilation_layer_1, dropout_p=0.5)
        self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=context_size_layer_2, dilation=dilation_layer_2, dropout_p=0.5)
        self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=context_size_layer_3, dilation=dilation_layer_3, dropout_p=0.5)
  # changed for grid-search end
        self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1, dropout_p=0.5)
        self.tdnn5 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=3, dropout_p=0.5)
        #### Frame levelPooling
        self.segment6 = nn.Linear(1024, 512)
        self.segment7 = nn.Linear(512, 512)
        self.output = nn.Linear(512, num_classes)
        self.softmax = nn.Softmax(dim=1)
    def forward(self, inputs):
        tdnn1_out = self.tdnn1(inputs)
        tdnn2_out = self.tdnn2(tdnn1_out)
        tdnn3_out = self.tdnn3(tdnn2_out)
        tdnn4_out = self.tdnn4(tdnn3_out)
        tdnn5_out = self.tdnn5(tdnn4_out)
        ### Stat Pool
        
        mean = torch.mean(tdnn5_out,1)
        std = torch.var(tdnn5_out,1)
        stat_pooling = torch.cat((mean,std),1)
        segment6_out = self.segment6(stat_pooling)
        x_vec = self.segment7(segment6_out)
        predictions = self.output(x_vec)
        return predictions,x_vec

### Utiles 

In [None]:
def load_wav(audio_filepath, sr, min_dur_sec=4):
    audio_data,fs  = librosa.load(audio_filepath,sr=16000)
    len_file = len(audio_data)
    
    if len_file <int(min_dur_sec*sr):
        dummy=np.zeros((1,int(min_dur_sec*sr)-len_file))
        extened_wav = np.concatenate((audio_data,dummy[0]))
    else:
        
        extened_wav = audio_data
    return extened_wav


def lin_mel_from_wav(wav, hop_length, win_length, n_mels):
    linear = librosa.feature.melspectrogram(wav, n_mels=n_mels, win_length=win_length, hop_length=hop_length) # linear spectrogram
    return linear.T

def lin_spectogram_from_wav(wav, hop_length, win_length, n_fft=512):
    linear = librosa.stft(wav, n_fft=n_fft, win_length=win_length, hop_length=hop_length) # linear spectrogram
    return linear.T


def feature_extraction(filepath,sr=16000, min_dur_sec=4,win_length=400,hop_length=160, n_mels=40, spec_len=400,mode='train'):
    audio_data = load_wav(filepath, sr=sr,min_dur_sec=min_dur_sec)
    linear_spect = lin_spectogram_from_wav(audio_data, hop_length, win_length, n_fft=512)
    mag, _ = librosa.magphase(linear_spect)  # magnitude
    mag_T = mag.T
    mu = np.mean(mag_T, 0, keepdims=True)
    std = np.std(mag_T, 0, keepdims=True)
    return (mag_T - mu) / (std + 1e-5)
    
    
    
    
def load_data(filepath,sr=16000, min_dur_sec=4,win_length=400,hop_length=160, n_mels=40, spec_len=400,mode='train'):
    audio_data = load_wav(filepath, sr=sr,min_dur_sec=min_dur_sec)
    #linear_spect = lin_spectogram_from_wav(audio_data, hop_length, win_length, n_mels)
    linear_spect = lin_spectogram_from_wav(audio_data, hop_length, win_length, n_fft=512)
    mag, _ = librosa.magphase(linear_spect)  # magnitude
    mag_T = mag.T
    
    if mode=='train':
        randtime = np.random.randint(0, mag_T.shape[1]-spec_len)
        spec_mag = mag_T[:, randtime:randtime+spec_len]
    else:
        spec_mag = mag_T
    
    # preprocessing, subtract mean, divided by time-wise var
    mu = np.mean(spec_mag, 0, keepdims=True)
    std = np.std(spec_mag, 0, keepdims=True)
    return (spec_mag - mu) / (std + 1e-5)
    


def load_npy_data(filepath,spec_len=400,mode='train'):
    mag_T = np.load(filepath)
    if mode=='train':
        randtime = np.random.randint(0, mag_T.shape[1]-spec_len)
        spec_mag = mag_T[:, randtime:randtime+spec_len]
    else:
        spec_mag = mag_T
    return spec_mag
    
    



def speech_collate(batch):
    targets = []
    specs = []
    for sample in batch:
        specs.append(sample['features'])
        targets.append((sample['labels']))
    return specs, targets

## SpeechDataGenerator

In [None]:
class SpeechDataGenerator():
    """Speech dataset."""

    def __init__(self, manifest, mode):
        """
        Read the textfile and get the paths
        """
        
        # [line.rstrip('\n').split(' ')[0]
        self.mode=mode
        self.audio_links = [" ".join(line.rstrip('\n').split(' ')[:-1]) for line in open(manifest)]
        self.labels = [int(line.rstrip('\n').split(' ')[-1]) for line in open(manifest)]
        

    def __len__(self):
        return len(self.audio_links)

    def __getitem__(self, idx):
        audio_link =self.audio_links[idx]
        class_id = self.labels[idx]
        #lang_label=lang_id[self.audio_links[idx].split('/')[-2]]
        spec = load_data(audio_link,mode=self.mode)
        sample = {'features': torch.from_numpy(np.ascontiguousarray(spec)), 'labels': torch.from_numpy(np.ascontiguousarray(class_id))}
        return sample

## Train X Vector

In [None]:
torch.multiprocessing.set_sharing_strategy('file_system')

########## Argument parser

training_filepath = '/content/drive/My Drive/10_lang_wav_files/train_10_lang.txt'
testing_filepath = '/content/drive/My Drive/10_lang_wav_files/test_10_lang.txt'
validation_filepath = '/content/drive/My Drive/10_lang_wav_files/validation_10_lang.txt'
input_dim = 257
num_classes = 10
lamda_val = 0.5
batch_size = 100
use_gpu = True
num_epochs = 100
# parser = argparse.ArgumentParser(add_help=False)
# parser.add_argument('-training_filepath', type=str, default='/content/drive/My Drive/meta/training.txt')
# parser.add_argument('-testing_filepath', type=str, default='/content/drive/My Drive/meta/testing.txt')
# parser.add_argument('-validation_filepath', type=str, default='/content/drive/My Drive/meta/validation.txt')

# parser.add_argument('-input_dim', action="store_true", default=257)
# parser.add_argument('-num_classes', action="store_true", default=9)
# parser.add_argument('-lamda_val', action="store_true", default=0.5)
# parser.add_argument('-batch_size', action="store_true", default=10)
# parser.add_argument('-use_gpu', action="store_true", default=True)
# parser.add_argument('-num_epochs', action="store_true", default=30)
#args = parser.parse_args()


def train(model, optimizer, loss_fun, dataloader_train, epoch):
    train_loss_list = []
    full_preds = []
    full_gts = []
    model.train()
    # print("train start!")
    # print("batch: ", end =" ")
    for i_batch, sample_batched in enumerate(dataloader_train):
        print(f"{str(i_batch+1)},", end =" ")
        
        features = torch.from_numpy(np.asarray([torch_tensor.numpy().T for torch_tensor in sample_batched[0]])).float()
        labels = torch.from_numpy(np.asarray([torch_tensor[0].numpy() for torch_tensor in sample_batched[1]]))
        labels = labels.long()
        features, labels = features.to(device), labels.to(device)
        features.requires_grad = True
        optimizer.zero_grad()
        pred_logits, x_vec = model(features)
        #### CE loss
        loss = loss_fun(pred_logits, labels)
        loss.backward()
        optimizer.step()
        train_loss_list.append(loss.item())
        # train_acc_list.append(accuracy)
        # if i_batch%10==0:
        #    print('Loss {} after {} iteration'.format(np.mean(np.asarray(train_loss_list)),i_batch))

        predictions = np.argmax(pred_logits.detach().cpu().numpy(), axis=1)
        for pred in predictions:
            full_preds.append(pred)
        for lab in labels.detach().cpu().numpy():
            full_gts.append(lab)

    mean_acc = accuracy_score(full_gts, full_preds)
    mean_loss = np.mean(np.asarray(train_loss_list))
    # print()
    # print('Total training loss {} and training Accuracy {} after {} epochs'.format(mean_loss, mean_acc, epoch))



def validation(model, loss_fun, dataloader_train, epoch):
    # print("start validate!")
    model.eval()
    with torch.no_grad():
        val_loss_list = []
        full_preds = []
        full_gts = []
        for i_batch, sample_batched in enumerate(dataloader_val):
            features = torch.from_numpy(
                np.asarray([torch_tensor.numpy().T for torch_tensor in sample_batched[0]])).float()
            labels = torch.from_numpy(np.asarray([torch_tensor[0].numpy() for torch_tensor in sample_batched[1]]))
            labels = labels.long()
            features, labels = features.to(device), labels.to(device)
            pred_logits, x_vec = model(features)
            #### CE loss
            loss = loss_fun(pred_logits, labels)
            val_loss_list.append(loss.item())
            # train_acc_list.append(accuracy)
            predictions = np.argmax(pred_logits.detach().cpu().numpy(), axis=1)
            for pred in predictions:
                full_preds.append(pred)
            for lab in labels.detach().cpu().numpy():
                full_gts.append(lab)

        mean_acc = accuracy_score(full_gts, full_preds)
        mean_loss = np.mean(np.asarray(val_loss_list))
        # print('Total validation loss {} and Validation accuracy {} after {} epochs'.format(mean_loss, mean_acc, epoch))
        print(f"mean_acc: {mean_acc}, mean_loss: {mean_loss}", end =" ")
        return (mean_acc, mean_loss)


In [None]:
### Data related
dataset_train = SpeechDataGenerator(manifest=training_filepath, mode='train')
dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=speech_collate)

dataset_val = SpeechDataGenerator(manifest=validation_filepath, mode='train')
dataloader_val = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, collate_fn=speech_collate)

dataset_test = SpeechDataGenerator(manifest=testing_filepath, mode='test')
dataloader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True, collate_fn=speech_collate)

## Model related
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
# model = X_vector(input_dim, num_classes).to(device)
# optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9)
# loss_fun = nn.CrossEntropyLoss()

In [None]:
# LAYER 3 !!!!

#gridSearch start:

import itertools
num_epochs = 10

context_sizes = [3, 5, 7]
dilations = [1, 2, 3]

best_accuracy = 0.0
best_hyperparams = None
hyperparameters_list = []
accuracy_and_loss_list = []

for context_size, dilation in itertools.product(context_sizes, dilations):
    print()
    print(f'Training with context_size={context_size} and dilation={dilation}')
    print()

    # Initialize the model with the current hyperparameters for layer 1 (only!)
    # TODO: change to 3 layers in some way
    model = X_vector(input_dim, num_classes, context_size_layer_1=3, dilation_layer_1=2, context_size_layer_2=5, dilation_layer_2=2, context_size_layer_3=context_size, dilation_layer_3=dilation).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9)
    loss_fun = nn.CrossEntropyLoss()

    # Train and validate the model
    for epoch in range(num_epochs):
        print("epoch: " + str(epoch))
        train(model, optimizer, loss_fun, dataloader_train, epoch)
        accuracy_and_loss = validation(model, loss_fun, dataloader_val, epoch)
        

        if accuracy_and_loss[0] > best_accuracy:
            best_accuracy = accuracy_and_loss[0]
            best_hyperparams = (context_size, dilation)
    
    hyperparameters_list.append((context_size, dilation))
    accuracy_and_loss_list.append(accuracy_and_loss)
    print(f'hyperparameters (context_size,dilation): ={hyperparameters_list}, accuracy={accuracy_and_loss_list}')


print(f'Best hyperparameters: context_size={best_hyperparams[0]}, dilation={best_hyperparams[1]}, '
      f'validation accuracy={best_accuracy}')

print(f'hyperparameters (context_size,dilation): ={hyperparameters_list}, accuracy={accuracy_and_loss_list}')

#gridSearch end


Training with context_size=2 and dilation=1

epoch: 0
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, mean_acc: 0.11592539454806312, mean_loss: 2.2765924079077586 epoch: 1
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, mean_acc: 0.24447632711621234, mean_loss: 2.094502527373178 epoch: 2
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, m

In [None]:
# LAYER 3 !!!!

#gridSearch start:

import itertools
num_epochs = 10

context_sizes = [3, 5, 7]
dilations = [1, 2, 3]

best_accuracy = 0.0
best_hyperparams = None
hyperparameters_list = []
accuracy_and_loss_list = []

for context_size, dilation in itertools.product(context_sizes, dilations):
    print()
    print(f'Training with context_size={context_size} and dilation={dilation}')
    print()

    # Initialize the model with the current hyperparameters for layer 1 (only!)
    # TODO: change to 3 layers in some way
    model = X_vector(input_dim, num_classes, context_size_layer_1=3, dilation_layer_1=2, context_size_layer_2=5, dilation_layer_2=2, context_size_layer_3=context_size, dilation_layer_3=dilation).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9)
    loss_fun = nn.CrossEntropyLoss()

    # Train and validate the model
    for epoch in range(num_epochs):
        print("epoch: " + str(epoch))
        train(model, optimizer, loss_fun, dataloader_train, epoch)
        accuracy_and_loss = validation(model, loss_fun, dataloader_val, epoch)
        

        if accuracy_and_loss[0] > best_accuracy:
            best_accuracy = accuracy_and_loss[0]
            best_hyperparams = (context_size, dilation)
    
    hyperparameters_list.append((context_size, dilation))
    accuracy_and_loss_list.append(accuracy_and_loss)
    print(f'hyperparameters (context_size,dilation): ={hyperparameters_list}, accuracy={accuracy_and_loss_list}')


print(f'Best hyperparameters: context_size={best_hyperparams[0]}, dilation={best_hyperparams[1]}, '
      f'validation accuracy={best_accuracy}')

print(f'hyperparameters (context_size,dilation): ={hyperparameters_list}, accuracy={accuracy_and_loss_list}')

#gridSearch end


Training with context_size=3 and dilation=1

epoch: 0
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 

KeyboardInterrupt: ignored

In [None]:
# LAYER 3 !!!!

#gridSearch start:

import itertools
num_epochs = 10

context_sizes = [3, 5, 7]
dilations = [1, 2, 3]

best_accuracy = 0.0
best_hyperparams = None
hyperparameters_list = []
accuracy_and_loss_list = []

for context_size, dilation in itertools.product(context_sizes, dilations):
    print()
    print(f'Training with context_size={context_size} and dilation={dilation}')
    print()

    # Initialize the model with the current hyperparameters for layer 1 (only!)
    # TODO: change to 3 layers in some way
    model = X_vector(input_dim, num_classes, context_size_layer_1=3, dilation_layer_1=2, context_size_layer_2=5, dilation_layer_2=2, context_size_layer_3=context_size, dilation_layer_3=dilation).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9)
    loss_fun = nn.CrossEntropyLoss()

    # Train and validate the model
    for epoch in range(num_epochs):
        print("epoch: " + str(epoch))
        train(model, optimizer, loss_fun, dataloader_train, epoch)
        accuracy_and_loss = validation(model, loss_fun, dataloader_val, epoch)
        

        if accuracy_and_loss[0] > best_accuracy:
            best_accuracy = accuracy_and_loss[0]
            best_hyperparams = (context_size, dilation)
    
    hyperparameters_list.append((context_size, dilation))
    accuracy_and_loss_list.append(accuracy_and_loss)
    print(f'hyperparameters (context_size,dilation): ={hyperparameters_list}, accuracy={accuracy_and_loss_list}')


print(f'Best hyperparameters: context_size={best_hyperparams[0]}, dilation={best_hyperparams[1]}, '
      f'validation accuracy={best_accuracy}')

print(f'hyperparameters (context_size,dilation): ={hyperparameters_list}, accuracy={accuracy_and_loss_list}')

#gridSearch end


Training with context_size=2 and dilation=1

epoch: 0
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, mean_acc: 0.11592539454806312, mean_loss: 2.2765924079077586 epoch: 1
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, mean_acc: 0.24447632711621234, mean_loss: 2.094502527373178 epoch: 2
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, m

In [None]:
# LAYER 3 !!!!

#gridSearch start:

import itertools
num_epochs = 10

context_sizes = [3, 5, 7]
dilations = [1, 2, 3]

best_accuracy = 0.0
best_hyperparams = None
hyperparameters_list = []
accuracy_and_loss_list = []

for context_size, dilation in itertools.product(context_sizes, dilations):
    print()
    print(f'Training with context_size={context_size} and dilation={dilation}')
    print()

    # Initialize the model with the current hyperparameters for layer 1 (only!)
    # TODO: change to 3 layers in some way
    model = X_vector(input_dim, num_classes, context_size_layer_1=3, dilation_layer_1=2, context_size_layer_2=5, dilation_layer_2=2, context_size_layer_3=context_size, dilation_layer_3=dilation).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0, betas=(0.9, 0.98), eps=1e-9)
    loss_fun = nn.CrossEntropyLoss()

    # Train and validate the model
    for epoch in range(num_epochs):
        print("epoch: " + str(epoch))
        train(model, optimizer, loss_fun, dataloader_train, epoch)
        accuracy_and_loss = validation(model, loss_fun, dataloader_val, epoch)
        

        if accuracy_and_loss[0] > best_accuracy:
            best_accuracy = accuracy_and_loss[0]
            best_hyperparams = (context_size, dilation)
    
    hyperparameters_list.append((context_size, dilation))
    accuracy_and_loss_list.append(accuracy_and_loss)
    print(f'hyperparameters (context_size,dilation): ={hyperparameters_list}, accuracy={accuracy_and_loss_list}')


print(f'Best hyperparameters: context_size={best_hyperparams[0]}, dilation={best_hyperparams[1]}, '
      f'validation accuracy={best_accuracy}')

print(f'hyperparameters (context_size,dilation): ={hyperparameters_list}, accuracy={accuracy_and_loss_list}')

#gridSearch end


Training with context_size=2 and dilation=1

epoch: 0
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, mean_acc: 0.11592539454806312, mean_loss: 2.2765924079077586 epoch: 1
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, mean_acc: 0.24447632711621234, mean_loss: 2.094502527373178 epoch: 2
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, m