In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, layer_size, output_size, device):
        super(RNN, self).__init__()
        
        self.devices = device
        self.hidden_size = hidden_size
        self.layer_size = layer_size
        
        self.rnn = nn.RNN(input_size, hidden_size, layer_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        h0 = Variable(torch.zeros(self.layer_size, x.size(0), self.hidden_size)).to(self.device)
        output, hidden = self.rnn(x, h0)
        output = self.fc(output[:, -1, :])
        return output

In [None]:
input_size = 201
hidden_size = 10
layer_size = 1
output_size = 2
device = "cuda" if torch.cuda.is_available() else "cpu"

model = RNN(input_size, hidden_size, layer_size, output_size, device)

In [None]:
num_epoch = 10000
lr = 0.001

error = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [None]:
import torchaudio
import pandas as pd

train_data = pd.read_csv('Datasets/train.csv').to_numpy()
test_data = pd.read_csv('Datasets/test.csv').to_numpy()
spectrogram_transform = torchaudio.transforms.Spectrogram()

loss_list = []
iteration_list = []
count = 0

In [None]:
for epoch in range(num_epoch):
    for x_train,y_train in train_data:
        waveform,sample_rate=torchaudio.load(x_train)
        spectrogram = spectrogram_transform(waveform)
        spectrogram = spectrogram.reshape(spectrogram.shape[0],spectrogram.shape[2],spectrogram.shape[1])
        lang = torch.tensor([y_train])
        
        optimizer.zero_grad()
        output = model(spectrogram)
        loss = error(output, lang)
        
        loss.backward()
        optimizer.step()
        
        count += 1
        
        if count % 100 == 0:
            loss_list.append(loss.data)
            iteration_list.append(count)
            print('Iteration: {}  Loss: {}'.format(count, loss.data[0]))

In [None]:
import matplotlib.pyplot as plt

plt.plot(iteration_list,loss_list)
plt.xlabel("Number of iteration")
plt.ylabel("Loss")
plt.title("RNN: Loss vs Number of iteration")
plt.show()

In [None]:
iteration_list = []
accuracy_list = []
total = 0
correct = 0

In [None]:
for x_test,y_test in test_data:
    waveform,sample_rate=torchaudio.load(x_test)
    spectrogram = spectrogram_transform(waveform)
    spectrogram = spectrogram.reshape(spectrogram.shape[0],spectrogram.shape[2],spectrogram.shape[1])
    lang = torch.tensor([y_test])
        
    output = model(spectrogram)
    predicted = torch.argmax(output.data)
    
    total += 1
    correct += 1 if predicted == lang[0] else 0
    
    if total % 100 == 0:
        accuracy = 100 * correct / float(total)
        iteration_list.append(total)
        accuracy_list.append(accuracy)
        print("Iteration:",total,accuracy)

In [None]:
plt.plot(iteration_list,accuracy_list,color = "red")
plt.xlabel("Number of iteration")
plt.ylabel("Accuracy")
plt.title("RNN: Accuracy vs Number of iteration")
plt.savefig('graph.png')
plt.show()