<a href="https://colab.research.google.com/github/CRAUGUTH/nnProject/blob/main/Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Training NN**


In [None]:
!pip install --upgrade spotipy torch pandas scikit-learn transformers librosa joblib

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib
from google.colab import drive

In [86]:
class MusicData(Dataset):
    def __init__(self, path):
        data = pd.read_csv(path)
        data = data.drop(['track_id', 'artists', 'album_name', 'track_name', 'explicit'], axis=1)
        self.label_encoder = LabelEncoder()
        data['track_genre'] = self.label_encoder.fit_transform(data['track_genre'])
        self.features = data.drop(['track_genre'], axis=1).values.astype(np.float32)
        self.labels = data['track_genre'].values.astype(np.float32)
        self.scaler = StandardScaler()
        self.features = self.scaler.fit_transform(self.features)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = self.features[idx]
        y = self.labels[idx]
        return x, y

In [87]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dim, hidden_dim)
        self.layer2 = nn.Linear(hidden_dim, hidden_dim // 2)
        self.layer3 = nn.Linear(hidden_dim // 2, hidden_dim // 4)
        self.layer4 = nn.Linear(hidden_dim // 4, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.relu(self.layer3(x))
        x = self.layer4(x)
        return x

In [88]:
# Mount Google Drive
drive.mount('/content/drive')
data_path = '/content/drive/My Drive/Junior/Semester_2/NN/Project/dataset.csv'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [89]:
def evaluate(model, data_loader):
    model.eval()
    total = correct = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels.long()).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy}%')
    return accuracy

In [90]:
# Load dataset
dataset = MusicData(data_path)
save_path = "best_model"

# Splitting the dataset
total_size = len(dataset)
train_set_size = int(total_size * 0.6)
val_set_size = int(total_size * 0.2)
test_set_size = total_size - train_set_size - val_set_size

train_set, val_set, test_set = torch.utils.data.random_split(dataset, [train_set_size, val_set_size, test_set_size])

train_loader = DataLoader(dataset=train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val_set, batch_size=64, shuffle=False)
test_loader = DataLoader(dataset=test_set, batch_size=64, shuffle=False)

# Model, loss, and optimizer
input_dim = dataset.features.shape[1]
hidden_dim = 256  # Example hidden dimension
num_classes = len(np.unique(dataset.labels))

model = MLP(input_dim, hidden_dim, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [91]:
# Training loop
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.long())
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

    # Evaluate on validation set
    evaluate(model, val_loader)

Epoch 1, Loss: 1.6934605836868286
Accuracy: 48.166666666666664%
Epoch 2, Loss: 1.2743173837661743
Accuracy: 58.29824561403509%
Epoch 3, Loss: 1.6353559494018555
Accuracy: 64.10526315789474%
Epoch 4, Loss: 0.728980302810669
Accuracy: 68.00877192982456%
Epoch 5, Loss: 0.6280980110168457
Accuracy: 70.61842105263158%
Epoch 6, Loss: 0.6084839701652527
Accuracy: 73.03070175438596%
Epoch 7, Loss: 0.8116636276245117
Accuracy: 73.34649122807018%
Epoch 8, Loss: 0.554427981376648
Accuracy: 74.39912280701755%
Epoch 9, Loss: 0.5575842261314392
Accuracy: 76.46491228070175%
Epoch 10, Loss: 0.5749977231025696
Accuracy: 77.00438596491227%
Epoch 11, Loss: 0.491129994392395
Accuracy: 76.19736842105263%
Epoch 12, Loss: 0.41247034072875977
Accuracy: 78.3201754385965%
Epoch 13, Loss: 0.606252133846283
Accuracy: 79.15789473684211%
Epoch 14, Loss: 0.3050875663757324
Accuracy: 78.94298245614036%
Epoch 15, Loss: 0.3361106812953949
Accuracy: 80.90350877192982%
Epoch 16, Loss: 0.7164385318756104
Accuracy: 81.7412

In [92]:
# Final evaluation on the test set
print("Evaluating on the test set:")
evaluate(model, test_loader)

Evaluating on the test set:
Accuracy: 82.54824561403508%


82.54824561403508

In [93]:
# Save the model and preprocessors
torch.save(model.state_dict(), 'song_mood_classifier.pth')
joblib.dump(dataset.label_encoder, 'label_encoder.joblib')
joblib.dump(dataset.scaler, 'scaler.joblib')

['scaler.joblib']