In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%%capture
%cd drive/MyDrive/genrecog/
%pip install speechbrain

In [3]:
from genrecog.preprocess.preprocessor import Preprocessor
from genrecog.nnet.CNN import Conv1d
from genrecog.tools.trainer import CNNFbankTrainer
import torch
from torch.utils.data import TensorDataset, DataLoader
from importlib import reload


In [4]:
# Load dataset

train_preprcessor = Preprocessor('dataset/npz_files/train.npz')
test_preprcessor = Preprocessor('dataset/npz_files/test.npz')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [5]:
X, y = train_preprcessor.as_shuffled_torch()
X_test, y_test = test_preprcessor.as_shuffled_torch()

dataset = TensorDataset(X.to(device), y.to(device))
validation_dataset, train_dataset = torch.utils.data.random_split(dataset, (400, 3200))
test_dataset = TensorDataset(X_test.to(device), y_test.to(device))

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=400)
validation_dataloader = DataLoader(validation_dataset, shuffle=True, batch_size=400)
test_dataloader = DataLoader(test_dataset, shuffle=True, batch_size=400)

In [6]:
model = Conv1d(40)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
loss = torch.nn.CrossEntropyLoss()

if torch.cuda.is_available():
  model = model.cuda()
  loss = loss.cuda()
  
model

Conv1d(
  (batch_norm): BatchNorm1d(702, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (input_layer): Sequential(
    (0): Conv1d(40, 128, kernel_size=(32,), stride=(1,), padding=(16,))
    (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (hidden_layer_1): Sequential(
    (0): Conv1d(128, 256, kernel_size=(32,), stride=(1,), padding=(16,))
    (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): MaxPool1d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False)
  )
  (hidden_layer_2): Sequential(
    (0): Conv1d(256, 512, kernel_size=(32,), stride=(1,), padding=(16,))
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01)
    (3): MaxPool1d

In [7]:
trainer = CNNFbankTrainer(model, optimizer, loss, train_dataloader, validation_dataloader, num_epochs=100)
trainer.train()

Training accuracy 35.72
Training loss 2.0739
Validation accuracy 15.00
Validation loss 2.2962
Training accuracy 44.53
Training loss 1.7266
Validation accuracy 17.25
Validation loss 2.2430
Training accuracy 49.28
Training loss 1.5175
Validation accuracy 38.50
Validation loss 2.0322
Training accuracy 54.97
Training loss 1.3622
Validation accuracy 52.50
Validation loss 1.6914
Training accuracy 60.28
Training loss 1.2261
Validation accuracy 60.75
Validation loss 1.4020
Training accuracy 65.81
Training loss 1.1015
Validation accuracy 62.50
Validation loss 1.2137
Training accuracy 68.53
Training loss 0.9961
Validation accuracy 66.00
Validation loss 1.1020
Training accuracy 72.66
Training loss 0.8889
Validation accuracy 66.75
Validation loss 1.0031
Training accuracy 76.03
Training loss 0.7884
Validation accuracy 68.50
Validation loss 0.9512
Training accuracy 80.41
Training loss 0.6941
Validation accuracy 69.25
Validation loss 0.9149
Training accuracy 83.94
Training loss 0.6032
Validation accu

KeyboardInterrupt: ignored

In [8]:
y_pred, y_val, loss, accuracy = trainer.eval(test_dataloader)

In [9]:
accuracy

0.675000011920929

In [None]:
y_pred_1 = torch.argmax(y_pred, dim=1)
y_pred_1

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
accuracy_score(y_val.cpu(), y_pred_1.cpu())

In [None]:
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
array = confusion_matrix(y_val.cpu(), y_pred_1.cpu(), normalize='true')*100
genres = ['country', 'reggae', 'metal', 'pop', 'classical', 'disco', 'hiphop', 'blues', 'jazz', 'rock']
df_cm = pd.DataFrame(array, index = genres, columns = genres)
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True, cmap="YlGnBu")