In [2]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, layer_size, output_size, device):
        super(RNN, self).__init__()
        
        self.device = device
        self.hidden_size = hidden_size
        self.layer_size = layer_size
        
        self.rnn = nn.RNN(input_size, hidden_size, layer_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        h0 = torch.zeros(self.layer_size, x.size(0), self.hidden_size).to(self.device)
        out, hidden = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [3]:
input_size = 201
hidden_size = 10
layer_size = 1
output_size = 2
device = "cuda" if torch.cuda.is_available() else "cpu"

model = RNN(input_size, hidden_size, layer_size, output_size, device)

In [None]:
import torchaudio
import pandas as pd

train_data = pd.read_csv('Datasets/train.csv').to_numpy()
test_data = pd.read_csv('Datasets/test.csv').to_numpy()
spectrogram_transform = torchaudio.transforms.Spectrogram()

x_train = []
x_test = []
y_train = []
y_test = []

for train,test in zip(train_data,test_data):
    waveform_train, sample_rate_train = torchaudio.load(train[0])
    waveform_test, sample_rate_test = torchaudio.load(test[0])
    
    spectrogram_train = spectrogram_transform(waveform_train).to(device)
    spectrogram_test = spectrogram_transform(waveform_test).to(device)
    
    x_train.append(spectrogram_train)
    x_test.append(spectrogram_test)
    y_train.append(train[1])
    y_test.append(test[1])

In [None]:
x_train = torch.stack(x_train)
x_test = torch.stack(x_test)
y_train = torch.tensor(y_train)
y_test = torch.tensor(y_test)

In [13]:
from torch.utils.data import TensorDataset, DataLoader

batch_size = 16
n_iters = 10000
num_epochs = n_iters / (len(x_train) / batch_size)
num_epochs = int(num_epochs)

lr = 0.001
error = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

loss_list = []
iteration_list = []

train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

(list, list)

In [None]:
for epoch in range(num_epochs):
    for i, (audio, language) in enumerate(train_loader):
        audio = audio.view(-1, audio.shape[2], audio.shape[1])
        
        optimizer.zero_grad()
        output = model(audio)
        loss = error(output, language)
        
        loss.backward()
        optimizer.step()
        
        
        if i % 100 == 0:
            loss_list.append(loss.data)
            iteration_list.append(i)
            print('Iteration: {}  Loss: {}'.format(i, loss.data))

In [None]:
import matplotlib.pyplot as plt

plt.plot(iteration_list,loss_list)
plt.xlabel("Number of iteration")
plt.ylabel("Loss")
plt.title("RNN: Loss vs Number of iteration")
plt.show()

In [None]:
iteration_list = []
accuracy_list = []
total = 0
correct = 0

In [None]:
for i, (audio, language) in enumerate(test_loader):
    audio = audio.view(-1, audio.shape[2], audio.shape[1])
    
    output = model(audio)
    predicted = torch.argmax(output.data)
    
    total += 1
    correct += 1 if predicted == language else 0
    
    if total % 100 == 0:
        accuracy = 100 * correct / float(total)
        iteration_list.append(i)
        accuracy_list.append(accuracy)
        print("Iteration:",i,accuracy)

In [None]:
plt.plot(iteration_list,accuracy_list,color = "red")
plt.xlabel("Number of iteration")
plt.ylabel("Accuracy")
plt.title("RNN: Accuracy vs Number of iteration")
plt.savefig('graph.png')
plt.show()