In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import Tensor
from torch.utils.data import Dataset , DataLoader

from sklearn.model_selection import train_test_split
import numpy as np
from scipy import signal
from scipy.io import wavfile
from IPython.display import Audio
import os

In [11]:
train = pd.read_csv(".kaggle/competitions/freesound-audio-tagging/train.csv")
submission = pd.read_csv(".kaggle/competitions/freesound-audio-tagging/sample_submission.csv")

In [12]:
train_files = list(train['fname'])
test_files = list(submission['fname'])
manually_verified = list(train['manually_verified'])
train_labels = list(train['label'])

In [13]:
train_files , val_files , train_labels , val_labels , manually_train , manually_val = train_test_split(train_files , train_labels , manually_verified , train_size = 0.95 , random_state = 1)



In [15]:
classes = list(train['label'].unique())

In [16]:
num_to_class = dict((x , y) for x , y in enumerate(classes))

In [17]:
class_to_num = dict((y , x) for x , y in enumerate(classes))

In [4]:
class AudioDataset(Dataset):
  
  def __init__(self , file_names , num_classes, max_len , dir_name , labels = None , manually_verified = None , has_labels = True):
    
    super(AudioDataset , self).__init__()
    self.file_names = file_names
    self.labels = labels
    self.manually_verified = manually_verified
    self.max_len = max_len
    self.dir_name = dir_name
    self.num_classes = num_classes
    self.has_labels = has_labels
  
  def __len__(self):
    
    return len(self.file_names)
  
  def __getitem__(self , idx):
    
    file_name = self.file_names[idx]
    sample_rate , samples = wavfile.read(("%s/%s" % (self.dir_name , file_name)))
    frequencies, times, spectogram = signal.spectrogram(samples, sample_rate , scaling = 'spectrum')
    if(len(spectogram.shape) == 1):
      spectogram = np.zeros((210 , 129))
    else:
      audio_len = min(spectogram.shape[1] , self.max_len)
      spectogram = spectogram.T
      time_steps , num_freq = spectogram.shape
      spectogram = spectogram[:audio_len , :num_freq]
      if audio_len != self.max_len:
        to_pad = self.max_len - audio_len
        zeros = np.zeros((to_pad , num_freq))
        spectogram = np.concatenate((zeros , spectogram))
    
    if self.has_labels:
      label = self.labels[idx]
      label = class_to_num[label]
      manually = self.manually_verified[idx]
      
      return spectogram.astype(np.float32) , label , manually
    
    else:
      return spectogram.astype(np.float32)
    

In [19]:
train_dataset = AudioDataset(train_files , len(classes) , 210 , 'audio_train' , labels = train_labels , manually_verified = manually_train)

In [0]:
mean = np.zeros((210,129)).astype(np.float32)
var = np.zeros((210 , 129)).astype(np.float32)

for i in range(len(train_dataset)):
  spec , label , man = train_dataset[i]
  
  mean += spec
  
mean = mean / len(train_dataset)

for i in range(len(train_dataset)):
  spec , label , man = train_dataset[i]
  
  var += (spec - mean) 
  
var = var / len(train_dataset)


In [0]:
train_loader = DataLoader(dataset = train_dataset , batch_size = 1024 , shuffle = True , pin_memory = True)

In [0]:
train_iter = iter(train_loader)
train_iter.next()

In [0]:
val_dataset = AudioDataset(val_files , len(classes) , 210 , 'audio_train' , labels = val_labels , manually_verified = manually_val)

In [0]:
val_loader = DataLoader(dataset = val_dataset , batch_size = 1024 , shuffle = True)

In [0]:
test_dataset = AudioDataset(test_files , len(classes) , 210 , 'audio_test' , has_labels = False)

In [0]:
test_loader = DataLoader(dataset = test_dataset , batch_size = 1024)

In [0]:
class Model(nn.Module):
  
  def __init__(self):
    super(Model , self).__init__()
    self.Conv1d = nn.Conv1d(in_channels = 129 , out_channels = 196 , kernel_size = 11 , stride = 1)
    self.bn = nn.BatchNorm1d(num_features = 196)
    self.ReLU = nn.ReLU()
    self.GRU = nn.GRU(input_size = 129 , hidden_size = 80 , num_layers = 2 , batch_first = True)
    self.Linear = nn.Linear(in_features = 80 , out_features = 41)
    
  def forward(self , x):
    x.data.transpose_(1,2) # The input shape for Conv1d should be (N , X , seq_len)
    
    x = self.Conv1d(x)
    x = self.bn(x)
    x = self.ReLU(x)
    
    x.data.transpose_(1,2) #restoring the original shape (N , seq_len , X)
    
    x , _ = self.GRU(x)
    x.data= x.data[: , -1 , :].squeeze(1)
    
    logits = self.Linear(x)
    
    return logits
    
   

In [0]:
AudioModel = Model().cuda()

In [0]:
criterion = nn.CrossEntropyLoss()

In [0]:
mean = torch.from_numpy(mean)
var = torch.from_numpy(var)

In [0]:
train_iter = iter(train_loader)

optimizer = torch.optim.Adam(AudioModel.parameters())

num_epochs = 10



for epoch in range(num_epochs):
  
  num_corrects = 0

  for mini_batch in range(len(train_loader)):
    
  
    X , Y , M = train_iter.next()
  
    X = X - mean
    X = X / var
    
  
    X = Variable(X.cuda() , requires_grad = False)
    Y = Variable(Y.cuda() , requires_grad = False).long()
    M = M.cuda()
  
    logits = AudioModel(X)
    
    predictions = torch.argmax(logits , dim = 1)
    
    
    num_corrects += predictions.eq(Y).sum()
    
    
    loss = criterion(logits , Y)
  
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if(mini_batch == len(train_loader)-1):
      
      val_iter = iter(val_loader)
      X , Y , M = val_iter.next()
      X = X - mean
      X = X / var
      X = Variable(X.cuda() , requires_grad = False)
      Y = Variable(Y.cuda() , requires_grad = False).long()
      M = M.cuda()
      
      logits = AudioModel(X)
      
      val_loss = criterion(logits , Y)
      
      predictions = torch.argmax(logits , dim = 1)

      
      corrects = predictions.eq(Y).sum()
      
      
      val_acc = (corrects.item()*1.0 / len(val_dataset)) * 100
      
      train_acc = (num_corrects.item()*1.0 / len(train_dataset)) * 100
      
      print("Epoch[{} / {}] : loss = {} , train_acc = {}% , val_loss= {} , val_acc = {}%".format(epoch+1 , num_epochs , loss , train_acc , val_loss , val_acc))
      
      
  train_iter = iter(train_loader)
    
  
  
  
  
  
  
  
