<a href="https://colab.research.google.com/github/Lakshman511/MSLID/blob/master/ColabNotebooks/Experiments_with_duration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Libraries

In [None]:
!pip install --pre torchaudio -f https://download.pytorch.org/whl/nightly/torch_nightly.html
#!pip install torchaudio
#!pip install git+git://github.com/pytorch/audio

Looking in links: https://download.pytorch.org/whl/nightly/torch_nightly.html


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
%matplotlib inline
import matplotlib.pyplot as plt
from zipfile import ZipFile
import torchaudio
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

In [None]:
torch.cuda.is_available()

True

In [None]:
with ZipFile('/content/drive/My Drive/Languages.zip', mode='r') as input:
  input.extractall(".") 

In [None]:
import os
import random
import torchaudio

def prepareData(root, classes, datatype="train", duration=2, overlap=0.25, file_save="file.txt"):
    dataset = []
    for L in classes.keys():
        if datatype == "train":
            path = root + "/" + L + "/" + L + "_Train/"
        else:
            path = root + "/" + L + "/" + L + "_Test/"
        tempdata = [(path+f, classes[L]) for f in os.listdir(path)]

        for x in tempdata:
            waveform, samplerate = torchaudio.load(x[0])
            wave_size = waveform.size()[1]
            samplesize = int(samplerate * duration)
            start = 0
            while True:
                finish = start + samplesize
                if finish < wave_size:
                    dataset.append((x[0], start, finish, x[1]))
                else:
                    break
                start = start + int(samplesize * (1 - overlap))
        
    random.shuffle(dataset)

    #create a text file
    with open(file_save, "w") as ft:
        ft.write("audiofile, start, finish, label" + "\n")
        for sample in dataset:
            ft.write(str(sample)[1:-1] + "\n")
    

In [None]:
classes = {"Tamil" : 1, "Telugu" : 2, "Bengali" : 3, "Gujarathi" : 0 }
prepareData("/content/Languages", classes, datatype="train", duration=4, overlap=0.25, file_save="train_d2_o25.txt")
#preparing Test data
prepareData("/content/Languages", classes, datatype="test", duration=4, overlap=0.25, file_save="test_d2_o25.txt")
print("Dataset was written to the specified files successfully")



Dataset was written to the specified files successfully


In [None]:
import torch
import torchaudio
import torch.nn as nn
import pandas as pd
import numpy as np

class LogMelSpec(nn.Module):
    def __init__(self, sample_rate = 8000, n_mels=128, win_length=160, hop_length=80):
        super(LogMelSpec, self).__init__()
        self.transform = torchaudio.transforms.MelSpectrogram(sample_rate=sample_rate, n_mels=n_mels, win_length=win_length, hop_length=hop_length)

    def forward(self, x):
         x = self.transform(x)   # gets melspectrogram
         x = np.log(x + 1e-14)    #smoothing to avoid infinity
         return x


# add time stretch and freq augmentations
class SpecAugment(nn.Module):
    def __init__(self, rate, strategy=3, freq_mask=10, time_mask=30):
        super(SpecAugment, self).__init__()
        self.rate = rate
        
        self.specaug1 = nn.Sequential(
            torchaudio.transforms.FrequencyMasking(freq_mask),
            torchaudio.transforms.TimeMasking(time_mask)
        )

        self.specaug2 = nn.Sequential(
            torchaudio.transforms.FrequencyMasking(freq_mask),
            torchaudio.transforms.TimeMasking(time_mask),
            torchaudio.transforms.FrequencyMasking(freq_mask),
            torchaudio.transforms.TimeMasking(time_mask)
        )

        strategies = {1 : self.strategy1, 2 : self.strategy2, 3 : self.strategy3}
        self._forward = strategies[strategy]

    def forward(self, x):
        return self._forward(x)
        
    def strategy1(self, x):
        probability = torch.randn(1,1).item()
        if self.rate > probability:
            return self.specaug1(x)
        return x
        
    def strategy3(self, x):
        probability = torch.randn(1,1).item()
        if probability > 0.5:
            return self.strategy1(x)
        return self.strategy2(x)
        
    def strategy2(self, x):
        probability = torch.rand(1, 1).item()
        if self.rate > 0.5:
            return self.specaug2(x)
        return x




class AudioData(torch.utils.data.Dataset):
    parameters = {
        "sample_rate" : 8000,
        "n_feats" : 81,
        "rateof_aug" : 0.5,
        "aug_strategy" : 3,
        "time_mask" : 30,
        "freq_mask" : 10,
        
    }

    def __init__(self, datafile_path, sample_rate, n_feats, rateof_aug, aug_strategy, time_mask, freq_mask, valid=False, shuffle=True, log_ex=True):
        
        self.log_ex = log_ex

        
        if(type(datafile_path)==type("s")):
          print("Loaaa..ding data from ", datafile_path)
          self.data = pd.read_csv(datafile_path, delimiter=',')
        else:
          self.data = datafile_path

        if valid:
            self.audio_transforms = torch.nn.Sequential(
                LogMelSpec(sample_rate=sample_rate, n_mels=n_feats, win_length=160, hop_length=80)
                )
        else:
            self.audio_transforms = torch.nn.Sequential(
                LogMelSpec(sample_rate=sample_rate, n_mels=n_feats, win_length=160, hop_length=80),
                SpecAugment(rateof_aug, aug_strategy, time_mask, freq_mask) 
            )
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.item()
        
        file_path = self.data.audiofile.iloc[index]
        waveform, _ = torchaudio.load(file_path[1:-1])
        label = self.data.iloc[index, 3]
        start = self.data.iloc[index, 1]
        finish = self.data.iloc[index, 2]
        waveform = waveform[: , start : finish]
        spectrogram = self.audio_transforms(waveform)
        #spec_len = spectrogram.shape[-1] // 2
        #label_len = 1
        return spectrogram, label
    
    def describe(self):
        return self.data.describe()

In [None]:
train = AudioData("./train_d2_o25.txt", sample_rate=8000, n_feats=81, rateof_aug=0.5, aug_strategy=3, time_mask=30, freq_mask=10)
print(len(train))
test = AudioData("./test_d2_o25.txt", sample_rate=8000, n_feats=81, rateof_aug=0.5, aug_strategy=3, time_mask=30, freq_mask=10, valid=True)
print(len(test))


Loaaa..ding data from  ./train_d2_o25.txt
13744
Loaaa..ding data from  ./test_d2_o25.txt
3398


In [None]:

classes = {"Tamil" : 1, "Telugu" : 2, "Bengali" : 3, "Gujarathi" : 0}
#import dataset
batch_size = 32
cuda = torch.cuda.is_available()
print(cuda)
if cuda:
  torch.cuda.manual_seed(1)
dataloader_args = dict(shuffle=True, batch_size=batch_size, num_workers=4) if cuda else dict(shuffle=True, batch_size=batch_size)
train_loader = DataLoader(train,  **dataloader_args)
test_loader = DataLoader(test, **dataloader_args)

True


In [None]:
batch_size = 32
cuda = torch.cuda.is_available()
print(cuda)
if cuda:
  torch.cuda.manual_seed(1)
dataloader_args = dict(shuffle=True, batch_size=batch_size, num_workers=4) if cuda else dict(shuffle=True, batch_size=batch_size)
train_loader = DataLoader(train,  **dataloader_args)
test_loader = DataLoader(test, **dataloader_args)

True


In [None]:
print(type(train_loader))
print(len(train_loader.dataset))

torch.utils.data.dataloader.DataLoader
13744


In [None]:


print(type(test_loader))
print(len(test_loader.dataset))

torch.utils.data.dataloader.DataLoader
3398


In [None]:
sample = next(iter(train_loader))

In [None]:
sample[0].size()

torch.Size([32, 1, 81, 401])

In [None]:
sample = next(iter(test_loader))
sample[0].size()

torch.Size([32, 1, 81, 401])

In [None]:
print(type(sample))
print(sample[0][0].size())
print(len(sample[0]))
print(sample[0][1].size())
print(len(sample[1]))
print(sample[1])

<class 'list'>
torch.Size([1, 81, 401])
32
torch.Size([1, 81, 401])
32
tensor([2, 3, 1, 3, 0, 1, 2, 3, 1, 1, 0, 1, 0, 0, 2, 2, 0, 0, 1, 3, 1, 0, 3, 0,
        2, 1, 1, 3, 1, 1, 2, 1])


In [None]:
dropout_value = 0.06
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Convolution Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(16),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # input_side = 28, output_size = 28, RF = 3

        # CONVOLUTION BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output_size = 28, RF = 5

        # TRANSITION BLOCK 1
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 12, RF = 6

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), padding=1, bias=False),

            nn.BatchNorm2d(64),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output_size = 12, RF = 10

        # CONVOLUTION BLOCK 2
        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU()
        ) # output_size = 10, RF = 14
        self.pool2 = nn.MaxPool2d(2, 2) # output_size = 12, RF = 6

        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output_size = 8, RF = 18

        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3,3), padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output_size = 6, RF = 22
        self.pool3 = nn.MaxPool2d(2, 2) # output_size = 12, RF = 6
        self.fc1 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=64, kernel_size=(1,1), padding=0, bias=False),
            nn.BatchNorm2d(64),
            nn.Dropout(dropout_value),
            nn.ReLU()
        )
        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # output_size = 4, RF = 26
        self.Convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), padding=0, bias=False),
            nn.BatchNorm2d(64),
            nn.Dropout(dropout_value),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=(1,1), padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value),
            nn.ReLU()
        )
        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3,3), padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_value),
            nn.ReLU()
        ) # 
        # OUTPUT BLOCK
        self.gap = nn.AdaptiveAvgPool2d(1)

        self.convblock9 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=4, kernel_size=(1, 1), padding=0, bias=False),
            # nn.ReLU() NEVER!
        ) # output_size = 1, RF = 26

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.pool1(x)
        x1 = self.convblock3(x)
        x2 = self.convblock4(x1)
        x = self.pool2(x1+x2)
        x1 = self.convblock5(x)
        x2 = self.convblock6(x1)
        x = self.fc1(self.pool3(x1+x2))
        x = self.fc2(self.Convblock2(self.convblock7(x)))
        x = self.convblock8(x)
        x = self.gap(x)
        x = self.convblock9(x)
        x = x.view(-1, 4)
        return F.log_softmax(x, dim=-1)

In [None]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda")# if use_cuda else "cpu")
print(device)
model = Net().to(device)

summary(model, input_size=(1, 81 , 81))

cuda


Tesla T4 with CUDA capability sm_75 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_70.
If you want to use the Tesla T4 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 81, 81]             144
       BatchNorm2d-2           [-1, 16, 81, 81]              32
           Dropout-3           [-1, 16, 81, 81]               0
              ReLU-4           [-1, 16, 81, 81]               0
            Conv2d-5           [-1, 32, 81, 81]           4,608
       BatchNorm2d-6           [-1, 32, 81, 81]              64
           Dropout-7           [-1, 32, 81, 81]               0
              ReLU-8           [-1, 32, 81, 81]               0
         MaxPool2d-9           [-1, 32, 40, 40]               0
           Conv2d-10           [-1, 64, 40, 40]          18,432
      BatchNorm2d-11           [-1, 64, 40, 40]             128
          Dropout-12           [-1, 64, 40, 40]               0
             ReLU-13           [-1, 64, 40, 40]               0
           Conv2d-14           [-1, 64,

In [None]:
print(torch.__version__)
use_cuda

1.7.0.dev20200910+cu92


True

In [None]:
from tqdm import tqdm
import numpy as np

class ModelTrain():
  def __init__(self):
    #to monitor training and test losses
    self.train_losses = []
    self.test_losses = []
    self.train_acc = []
    self.test_acc = []
    self.train_epoch_end = []
    self.preds={}
    # initialize tracker for minimum validation loss
    self.valid_loss_min = np.Inf # set initial "min" to infinity

  def train(self, model, device, train_loader, optimizer, epoch,scheduler,  L1lambda=None):
    model.train()    # prep model for training
    pbar = tqdm(train_loader)
    correct = 0
    processed = 0
    for batch_idx, (data, target) in enumerate(pbar):
      
      # get samples
      data, target = data.to(device), target.to(device)

      # Init
      optimizer.zero_grad()    # clear the gradients of all optimized variables
      # In PyTorch, we need to set the gradients to zero before starting to do backpropragation because PyTorch accumulates the gradients on subsequent backward passes. 
      # Because of this, when you start your training loop, ideally you should zero out the gradients so that you do the parameter update correctly.

      # Predict
      y_pred = model(data)   # forward pass: compute predicted outputs by passing inputs to the model
      

      # Calculate loss
      loss = F.nll_loss(y_pred, target)
      
      #Implementing L1 regularization
      if L1lambda is not None:
        #l1_crit = nn.L1Loss(size_average=False)
        reg_loss = 0.
        for param in model.parameters():
          #reg_loss += l1_crit(param)
          reg_loss += torch.sum(param.abs())
        loss += L1lambda * reg_loss

      self.train_losses.append(loss)

      # Backpropagation
      loss.backward()   # backward pass: compute gradient of the loss with respect to model parameters
      optimizer.step()   # perform a single optimization step (parameter update)

      # Update pbar-tqdm
    
      pred = y_pred.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
      correct += pred.eq(target.view_as(pred)).sum().item()
      processed += len(data)

      pbar.set_description(desc= f'Loss={loss.item()} Batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
      self.train_acc.append(100*correct/processed)
      scheduler.step()
    self.train_epoch_end.append(self.train_acc[-1])


  ####VAlidate the model ####
  def test(self, model, device, test_loader, filename):
    #valid_loss_min = np.Inf
    model.eval()  # prep model for evaluation
    test_loss = 0
    correct = 0
    self.preds={}
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)  # forward pass: compute predicted outputs by passing inputs to the model
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            #test_loss += criterion(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()
            #print(target,pred)
            for i in range(len(target)):
              #print(target[i],pred[i])
              if (target[i].item() not in self.preds):
                self.preds[target[i].item()]=[pred[i][0].item()]
              else:
                self.preds[target[i].item()].append(pred[i][0].item())
    test_loss /= len(test_loader.dataset)
    self.test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    self.test_acc.append(100. * correct / len(test_loader.dataset))

    # save model if validation loss has decreased
    if test_loss <= self.valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        self.valid_loss_min,
        test_loss))
        torch.save(model.state_dict(), filename)
        self.valid_loss_min = test_loss

In [None]:
import gc
def experimentation(duration,lr):
  classes = {"Tamil" : 1, "Telugu" : 2, "Bengali" : 3, "Gujarathi" : 0 }
  prepareData("/content/Languages", classes, datatype="train", duration=duration, overlap=0.25, file_save="train_d2_o25.txt")
  #preparing Test data
  prepareData("/content/Languages", classes, datatype="test", duration=4, overlap=0.25, file_save="test_d2_o25.txt")
  print("Dataset was written to the specified files successfully")
  train = AudioData("./train_d2_o25.txt", sample_rate=8000, n_feats=81, rateof_aug=0.5, aug_strategy=3, time_mask=30, freq_mask=10)
  print(len(train))
  test = AudioData("./test_d2_o25.txt", sample_rate=8000, n_feats=81, rateof_aug=0.5, aug_strategy=3, time_mask=30, freq_mask=10, valid=True)
  print(len(test))
  classes = {"Tamil" : 1, "Telugu" : 2, "Bengali" : 3, "Gujarathi" : 0}
  #import dataset
  batch_size = 32
  cuda = torch.cuda.is_available()
  print(cuda)
  if cuda:
    torch.cuda.manual_seed(1)
  dataloader_args = dict(shuffle=True, batch_size=batch_size, num_workers=4) if cuda else dict(shuffle=True, batch_size=batch_size)
  train_loader = DataLoader(train,  **dataloader_args)
  test_loader = DataLoader(test, **dataloader_args)
  #from torch.optim.lr_scheduler import StepLR
  #torch.cuda.empty_cache()
  device = torch.device("cuda")
  model=Net().to(device)
  from torch.optim.lr_scheduler import OneCycleLR
  optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
  #scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, steps_per_epoch=len(train_loader), epochs=15)
  #first model
  #without L1 and L2 regularization
  model00 = ModelTrain()
  EPOCHS = 15
  for epoch in range(EPOCHS):
    gc.collect()
    print("EPOCH:", epoch)
    model00.train(model, device, train_loader, optimizer, epoch, scheduler)
    model00.test(model, device, test_loader, "/content/drive/My Drive/LM6withAug.pt")
  global results
  results.append([lr,duration,max(model00.test_acc)])

In [26]:
results=[]
durations=[2,3,4]
lrs=[0.0725,0.1]
for i in durations:
  for j in lrs:
    experimentation(i,j)

  0%|          | 0/1209 [00:00<?, ?it/s]

Dataset was written to the specified files successfully
Loaaa..ding data from  ./train_d2_o25.txt
38680
Loaaa..ding data from  ./test_d2_o25.txt
3398
True
EPOCH: 0


Loss=0.21890856325626373 Batch_id=1208 Accuracy=69.81: 100%|██████████| 1209/1209 [01:31<00:00, 13.23it/s]



Test set: Average loss: 1.1915, Accuracy: 1876/3398 (55.21%)

Validation loss decreased (inf --> 1.191531).  Saving model ...


  0%|          | 0/1209 [00:00<?, ?it/s]

EPOCH: 1


Loss=0.3191995322704315 Batch_id=1208 Accuracy=82.42: 100%|██████████| 1209/1209 [01:31<00:00, 13.27it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.5549, Accuracy: 2611/3398 (76.84%)

Validation loss decreased (1.191531 --> 0.554903).  Saving model ...
EPOCH: 2


Loss=0.8425745964050293 Batch_id=1208 Accuracy=87.29: 100%|██████████| 1209/1209 [01:31<00:00, 13.26it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.6991, Accuracy: 2510/3398 (73.87%)

EPOCH: 3


Loss=0.12474062293767929 Batch_id=1208 Accuracy=90.07: 100%|██████████| 1209/1209 [01:31<00:00, 13.25it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.9396, Accuracy: 2078/3398 (61.15%)

EPOCH: 4


Loss=0.3642864227294922 Batch_id=1208 Accuracy=91.97: 100%|██████████| 1209/1209 [01:30<00:00, 13.32it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 1.9445, Accuracy: 1671/3398 (49.18%)

EPOCH: 5


Loss=0.36640605330467224 Batch_id=1208 Accuracy=93.54: 100%|██████████| 1209/1209 [01:30<00:00, 13.41it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.6030, Accuracy: 2658/3398 (78.22%)

EPOCH: 6


Loss=0.021672239527106285 Batch_id=1208 Accuracy=94.08: 100%|██████████| 1209/1209 [01:31<00:00, 13.19it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.5800, Accuracy: 2700/3398 (79.46%)

EPOCH: 7


Loss=0.18791402876377106 Batch_id=1208 Accuracy=95.26: 100%|██████████| 1209/1209 [01:33<00:00, 12.95it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 1.0681, Accuracy: 2426/3398 (71.39%)

EPOCH: 8


Loss=0.15497910976409912 Batch_id=1208 Accuracy=95.70: 100%|██████████| 1209/1209 [01:33<00:00, 12.93it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.8558, Accuracy: 2439/3398 (71.78%)

EPOCH: 9


Loss=0.08302166312932968 Batch_id=1208 Accuracy=96.16: 100%|██████████| 1209/1209 [01:31<00:00, 13.17it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.7681, Accuracy: 2477/3398 (72.90%)

EPOCH: 10


Loss=0.11939247697591782 Batch_id=1208 Accuracy=96.85: 100%|██████████| 1209/1209 [01:31<00:00, 13.17it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.9068, Accuracy: 2477/3398 (72.90%)

EPOCH: 11


Loss=0.06185821816325188 Batch_id=1208 Accuracy=97.48: 100%|██████████| 1209/1209 [01:31<00:00, 13.22it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.7766, Accuracy: 2569/3398 (75.60%)

EPOCH: 12


Loss=0.007368836086243391 Batch_id=1208 Accuracy=98.09: 100%|██████████| 1209/1209 [01:31<00:00, 13.19it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.7442, Accuracy: 2633/3398 (77.49%)

EPOCH: 13


Loss=0.06222523748874664 Batch_id=1208 Accuracy=98.28: 100%|██████████| 1209/1209 [01:31<00:00, 13.15it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.8507, Accuracy: 2543/3398 (74.84%)

EPOCH: 14


Loss=0.004637116100639105 Batch_id=1208 Accuracy=98.46: 100%|██████████| 1209/1209 [01:32<00:00, 13.08it/s]



Test set: Average loss: 0.7624, Accuracy: 2601/3398 (76.55%)



  0%|          | 0/1209 [00:00<?, ?it/s]

Dataset was written to the specified files successfully
Loaaa..ding data from  ./train_d2_o25.txt
38680
Loaaa..ding data from  ./test_d2_o25.txt
3398
True
EPOCH: 0


Loss=0.5448221564292908 Batch_id=1208 Accuracy=70.38: 100%|██████████| 1209/1209 [01:32<00:00, 13.12it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 3.5927, Accuracy: 1374/3398 (40.44%)

Validation loss decreased (inf --> 3.592749).  Saving model ...
EPOCH: 1


Loss=0.9347822666168213 Batch_id=1208 Accuracy=82.01: 100%|██████████| 1209/1209 [01:32<00:00, 13.02it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 1.0232, Accuracy: 2098/3398 (61.74%)

Validation loss decreased (3.592749 --> 1.023165).  Saving model ...
EPOCH: 2


Loss=0.6146601438522339 Batch_id=1208 Accuracy=86.66: 100%|██████████| 1209/1209 [01:33<00:00, 12.93it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.9817, Accuracy: 2250/3398 (66.22%)

Validation loss decreased (1.023165 --> 0.981747).  Saving model ...
EPOCH: 3


Loss=0.2666407525539398 Batch_id=1208 Accuracy=89.45: 100%|██████████| 1209/1209 [01:31<00:00, 13.17it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.7466, Accuracy: 2473/3398 (72.78%)

Validation loss decreased (0.981747 --> 0.746615).  Saving model ...
EPOCH: 4


Loss=0.4356551170349121 Batch_id=1208 Accuracy=91.52: 100%|██████████| 1209/1209 [01:31<00:00, 13.25it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.9032, Accuracy: 2521/3398 (74.19%)

EPOCH: 5


Loss=0.5980989933013916 Batch_id=1208 Accuracy=92.68: 100%|██████████| 1209/1209 [01:31<00:00, 13.17it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 1.2668, Accuracy: 2086/3398 (61.39%)

EPOCH: 6


Loss=0.06948748975992203 Batch_id=1208 Accuracy=93.75: 100%|██████████| 1209/1209 [01:32<00:00, 13.11it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.9994, Accuracy: 2280/3398 (67.10%)

EPOCH: 7


Loss=0.09696058183908463 Batch_id=1208 Accuracy=94.75: 100%|██████████| 1209/1209 [01:35<00:00, 12.64it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 1.0391, Accuracy: 2219/3398 (65.30%)

EPOCH: 8


Loss=0.050333794206380844 Batch_id=1208 Accuracy=95.40: 100%|██████████| 1209/1209 [01:33<00:00, 12.97it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.9742, Accuracy: 2287/3398 (67.30%)

EPOCH: 9


Loss=0.04243985190987587 Batch_id=1208 Accuracy=95.94: 100%|██████████| 1209/1209 [01:32<00:00, 13.11it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 1.4625, Accuracy: 2179/3398 (64.13%)

EPOCH: 10


Loss=0.01749628782272339 Batch_id=1208 Accuracy=96.41: 100%|██████████| 1209/1209 [01:33<00:00, 12.99it/s]



Test set: Average loss: 0.7268, Accuracy: 2663/3398 (78.37%)

Validation loss decreased (0.746615 --> 0.726756).  Saving model ...


  0%|          | 0/1209 [00:00<?, ?it/s]

EPOCH: 11


Loss=0.002000445267185569 Batch_id=1208 Accuracy=97.18: 100%|██████████| 1209/1209 [01:33<00:00, 12.92it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.8193, Accuracy: 2496/3398 (73.45%)

EPOCH: 12


Loss=0.07247444242238998 Batch_id=1208 Accuracy=97.75: 100%|██████████| 1209/1209 [01:32<00:00, 13.08it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.8950, Accuracy: 2472/3398 (72.75%)

EPOCH: 13


Loss=0.008475718088448048 Batch_id=1208 Accuracy=98.22: 100%|██████████| 1209/1209 [01:32<00:00, 13.13it/s]
  0%|          | 0/1209 [00:00<?, ?it/s]


Test set: Average loss: 0.8966, Accuracy: 2476/3398 (72.87%)

EPOCH: 14


Loss=0.1281672567129135 Batch_id=1208 Accuracy=98.45: 100%|██████████| 1209/1209 [01:33<00:00, 12.94it/s]



Test set: Average loss: 0.8996, Accuracy: 2486/3398 (73.16%)



  0%|          | 0/694 [00:00<?, ?it/s]

Dataset was written to the specified files successfully
Loaaa..ding data from  ./train_d2_o25.txt
22195
Loaaa..ding data from  ./test_d2_o25.txt
3398
True
EPOCH: 0


Loss=0.4122655689716339 Batch_id=693 Accuracy=69.95: 100%|██████████| 694/694 [01:08<00:00, 10.21it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 1.3562, Accuracy: 1596/3398 (46.97%)

Validation loss decreased (inf --> 1.356185).  Saving model ...
EPOCH: 1


Loss=0.6501451730728149 Batch_id=693 Accuracy=82.52: 100%|██████████| 694/694 [01:08<00:00, 10.17it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 2.0615, Accuracy: 2231/3398 (65.66%)

EPOCH: 2


Loss=0.39775583148002625 Batch_id=693 Accuracy=87.10: 100%|██████████| 694/694 [01:08<00:00, 10.13it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 0.8436, Accuracy: 2464/3398 (72.51%)

Validation loss decreased (1.356185 --> 0.843631).  Saving model ...
EPOCH: 3


Loss=0.17115674912929535 Batch_id=693 Accuracy=89.85: 100%|██████████| 694/694 [01:08<00:00, 10.07it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 1.5706, Accuracy: 1811/3398 (53.30%)

EPOCH: 4


Loss=0.09532701969146729 Batch_id=693 Accuracy=92.22: 100%|██████████| 694/694 [01:08<00:00, 10.09it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 1.3249, Accuracy: 2319/3398 (68.25%)

EPOCH: 5


Loss=0.04292144253849983 Batch_id=693 Accuracy=93.65: 100%|██████████| 694/694 [01:08<00:00, 10.17it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 0.8899, Accuracy: 2447/3398 (72.01%)

EPOCH: 6


Loss=0.06558562815189362 Batch_id=693 Accuracy=94.57: 100%|██████████| 694/694 [01:08<00:00, 10.09it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 0.8983, Accuracy: 2419/3398 (71.19%)

EPOCH: 7


Loss=0.19374501705169678 Batch_id=693 Accuracy=95.48: 100%|██████████| 694/694 [01:08<00:00, 10.15it/s]



Test set: Average loss: 0.8340, Accuracy: 2460/3398 (72.40%)

Validation loss decreased (0.843631 --> 0.834046).  Saving model ...


  0%|          | 0/694 [00:00<?, ?it/s]

EPOCH: 8


Loss=0.21229799091815948 Batch_id=693 Accuracy=96.17: 100%|██████████| 694/694 [01:08<00:00, 10.14it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 0.6903, Accuracy: 2639/3398 (77.66%)

Validation loss decreased (0.834046 --> 0.690269).  Saving model ...
EPOCH: 9


Loss=0.0072287446819245815 Batch_id=693 Accuracy=96.89: 100%|██████████| 694/694 [01:09<00:00,  9.98it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 0.7364, Accuracy: 2621/3398 (77.13%)

EPOCH: 10


Loss=0.004025834146887064 Batch_id=693 Accuracy=97.46: 100%|██████████| 694/694 [01:10<00:00,  9.80it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 0.8540, Accuracy: 2572/3398 (75.69%)

EPOCH: 11


Loss=0.024791309610009193 Batch_id=693 Accuracy=97.96: 100%|██████████| 694/694 [01:11<00:00,  9.69it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 1.1596, Accuracy: 2468/3398 (72.63%)

EPOCH: 12


Loss=0.009643567726016045 Batch_id=693 Accuracy=98.34: 100%|██████████| 694/694 [01:12<00:00,  9.53it/s]
  0%|          | 0/694 [00:00<?, ?it/s]


Test set: Average loss: 0.9349, Accuracy: 2685/3398 (79.02%)

EPOCH: 13


Loss=0.1104130670428276 Batch_id=229 Accuracy=98.61:  33%|███▎      | 228/694 [00:24<00:52,  8.86it/s] 

Buffered data was truncated after reaching the output size limit.

In [27]:
results

[[0.0725, 2, 79.45850500294291],
 [0.1, 2, 78.36962919364332],
 [0.0725, 3, 79.01706886403767],
 [0.1, 3, 83.31371394938199],
 [0.0725, 4, 80.40023543260742],
 [0.1, 4, 83.5197174808711]]

In [None]:
for i in range(4):
  print(i,(model00.preds[i].count(i)/len(model00.preds[i]))*100)
  '''for j in range(4):
    print(i,j,model00.preds[i].count(j))'''
  print()

In [None]:
plt.plot(model00.test_losses)

In [None]:
wave_form, sample_rate = torchaudio.load('./Languages/Bengali/Bengali_Train/Bengali1764.wav')

specgram = torchaudio.transforms.MelSpectrogram(win_length=256,hop_length=256)(wave_form[0:3200])
print(type(specgram))

print("Shape of spectrogram: {}".format(specgram.size()))

plt.figure()
p = plt.imshow(specgram.log2()[0,:,:].detach().numpy())

In [None]:
parameters = {
        "sample_rate" : 8000,
        "n_feats" : 81,
        "rateof_aug" : 0.5,
        "aug_strategy" : 3,
        "time_mask" : 30,
        "freq_mask" : 10,
        
    }

In [None]:
audio_transforms = torch.nn.Sequential(
                LogMelSpec(sample_rate=8000, n_mels=128, win_length=256, hop_length=256),
                SpecAugment(0.5, 3,30, 10) 
            )

In [None]:
wave_form, sample_rate = torchaudio.load('./Languages/Bengali/Bengali_Train/Bengali1764.wav')

specgram = audio_transforms(wave_form[0:3200])
print(type(specgram))

print("Shape of spectrogram: {}".format(specgram.size()))

plt.figure()
p = plt.imshow(specgram[0,:,:].detach().numpy())