In [131]:
%pip install torch
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split # Import train_test_split function
from matplotlib import pyplot as plt
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

Note: you may need to restart the kernel to use updated packages.


In [132]:
class Data(Dataset):
  def __init__(self, X_train, y_train):
    # need to convert float64 to float32 else 
    # will get the following error
    # RuntimeError: expected scalar type Double but found Float
    self.X = torch.from_numpy(X_train.astype(np.float32))
    # need to convert float64 to Long else 
    # will get the following error
    # RuntimeError: expected scalar type Long but found Float
    self.y = torch.from_numpy(y_train).type(torch.LongTensor)
    self.len = self.X.shape[0]
  
  def __getitem__(self, index):
    return self.X[index], self.y[index]
  def __len__(self):
    return self.len

In [133]:
yescols = ["danceability", "key",
       "loudness", "mode", "speechiness", "acousticness", "instrumentalness",
       "valence", "tempo"]
nocols = ['names','albums', "energy", 'types', 'duration_ms', 'time_signature']

In [134]:
# number of features (len of X cols)
input_dim = len(yescols)
# number of hidden layers
hidden_layers = 4
# number of classes (unique of y)
output_dim = 2
class Network(nn.Module):
  def __init__(self):
    super(Network, self).__init__()
    self.linear1 = nn.Linear(input_dim, hidden_layers)
    self.linear2 = nn.Linear(hidden_layers, output_dim)
  def forward(self, x):
    x = torch.sigmoid(self.linear1(x))
    x = self.linear2(x)
    return x

In [135]:
df = pd.read_csv('DeathGripsDataset.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,names,albums,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,valence,tempo,types,duration_ms,time_signature,Labels
0,0,Get Got,The Money Store,0.57,0.993,7,-3.757,1,0.337,0.00628,0.00796,0.405,87.0,audio_features,171867,4,1
1,1,The Fever (Aye Aye),The Money Store,0.432,0.94,10,-4.099,0,0.374,0.159,0.0,0.225,80.084,audio_features,186973,4,1
2,2,Lost Boys,The Money Store,0.464,0.949,2,-2.454,1,0.336,0.003,0.000113,0.374,77.121,audio_features,186320,4,1
3,3,Blackjack,The Money Store,0.532,0.855,4,-3.014,0,0.301,0.259,0.0,0.534,136.902,audio_features,142000,4,1
4,4,Hustle Bones,The Money Store,0.6,0.982,4,-2.996,0,0.201,0.104,0.0,0.277,110.015,audio_features,192360,4,1


In [136]:
X = np.array(df[yescols])
Y = np.array(df.Labels)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=42) # 70% training and 30% test

In [137]:
traindata = Data(X_train, Y_train)
batch_size = 1
trainloader = DataLoader(traindata, batch_size=batch_size, 
                         shuffle=True, num_workers=0)

In [138]:
clf = Network()
PATH = './mymodel.pth'
clf.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [139]:
print(clf.parameters)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(clf.parameters(), lr=0.1)

<bound method Module.parameters of Network(
  (linear1): Linear(in_features=9, out_features=4, bias=True)
  (linear2): Linear(in_features=4, out_features=2, bias=True)
)>


In [140]:
epochs = 5
for epoch in range(epochs):
  running_loss = 0.0
  for i, data in enumerate(trainloader, 0):
    inputs, labels = data
    # set optimizer to zero grad to remove previous epoch gradients
    optimizer.zero_grad()
    # forward propagation
    outputs = clf(inputs)
    loss = criterion(outputs, labels)
    # backward propagation
    loss.backward()
    # optimize
    optimizer.step()
    running_loss += loss.item()
  # display statistics
  print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.5f}')

[1,    61] loss: 0.02197


[2,    61] loss: 0.02153
[3,    61] loss: 0.02136
[4,    61] loss: 0.02150
[5,    61] loss: 0.02188


In [141]:
torch.save(clf.state_dict(), PATH)

In [142]:
testdata = Data(X_test, Y_test)
testloader = DataLoader(testdata, batch_size=batch_size, 
                        shuffle=True, num_workers=0)

In [143]:
correct, total = 0, 0
# no need to calculate gradients during inference
with torch.no_grad():
  for data in testloader:
    inputs, labels = data
    # calculate output by running through the network
    outputs = clf(inputs)
    # get the predictions
    __, predicted = torch.max(outputs.data, 1)
    # update results
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the {len(testdata)} test data: {100 * correct // total} %')

Accuracy of the network on the 27 test data: 55 %
