<a href="https://colab.research.google.com/github/CRAUGUTH/nnProject/blob/main/Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Training NN**


In [None]:
!pip install --upgrade spotipy torch pandas scikit-learn transformers librosa joblib

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib
from google.colab import drive

In [11]:
class MusicData(Dataset):
    def __init__(self, path):
        data = pd.read_csv(path)
        data = data.drop(['track_id', 'artists', 'album_name', 'track_name', 'explicit'], axis=1)
        self.label_encoder = LabelEncoder()
        data['track_genre'] = self.label_encoder.fit_transform(data['track_genre'])
        self.features = data.drop(['track_genre'], axis=1).values.astype(np.float32)
        self.labels = data['track_genre'].values.astype(np.float32)
        self.scaler = StandardScaler()
        self.features = self.scaler.fit_transform(self.features)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = self.features[idx]
        y = self.labels[idx]
        return x, y

In [12]:
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.network(x)

In [None]:
# Mount Google Drive
drive.mount('/content/drive')
data_path = '/content/drive/My Drive/Junior/Semester_2/NN/Project/dataset.csv'

In [14]:
# Function to evaluate model performance
def evaluate(model, data_loader):
    model.eval()
    total_correct = 0
    total_samples = 0
    all_predicted = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels.long()).sum().item()
            all_predicted.extend(predicted.view(-1).tolist())
            all_labels.extend(labels.long().view(-1).tolist())

    accuracy = (total_correct / total_samples) * 100
    f1 = f1_score(all_labels, all_predicted, average='weighted') # weighted average of the F1 score for each class
    return accuracy, f1

In [15]:
# Load dataset
dataset = MusicData(data_path)
save_path = "best_model"

# Splitting the dataset
total_size = len(dataset)
train_set_size = int(total_size * 0.6)
val_set_size = int(total_size * 0.2)
test_set_size = int(total_size * 0.2)
train_set, val_set, test_set = torch.utils.data.random_split(dataset, [train_set_size, val_set_size, test_set_size])

train_loader = DataLoader(dataset=train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val_set, batch_size=64, shuffle=False)
test_loader = DataLoader(dataset=test_set, batch_size=64, shuffle=False)

# Model, loss, and optimizer
input_dim = dataset.features.shape[1]
num_classes = len(np.unique(dataset.labels))
model = MLP(input_dim, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

In [16]:
# Training loop
num_epochs = 25
for epoch in range(num_epochs):
    model.train()
    total_correct = 0
    total_samples = 0
    train_predicted = []
    train_labels = []

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.long())
        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels.long()).sum().item()
        train_predicted.extend(predicted.view(-1).tolist())
        train_labels.extend(labels.long().view(-1).tolist())
        loss.backward()
        optimizer.step()

    train_accuracy = (total_correct / total_samples) * 100
    train_f1 = f1_score(train_labels, train_predicted, average='weighted')

    val_accuracy, val_f1 = evaluate(model, val_loader)

    print(f'Epoch {epoch+1}, Training Accuracy: {train_accuracy}%, Validation Accuracy: {val_accuracy}%, Training F1: {train_f1}, Validation F1: {val_f1}')

Epoch 1, Training Accuracy: 34.255847953216374%, Validation Accuracy: 51.71491228070175%, Training F1: 0.3246306108090284, Validation F1: 0.4924904319164748
Epoch 2, Training Accuracy: 48.80994152046784%, Validation Accuracy: 60.675438596491226%, Training F1: 0.4778365517680585, Validation F1: 0.5919158089343505
Epoch 3, Training Accuracy: 54.94005847953216%, Validation Accuracy: 65.70614035087719%, Training F1: 0.5426320935139963, Validation F1: 0.6474221720055642
Epoch 4, Training Accuracy: 58.88742690058479%, Validation Accuracy: 69.12719298245614%, Training F1: 0.5836065017598016, Validation F1: 0.6766011435911
Epoch 5, Training Accuracy: 61.72222222222222%, Validation Accuracy: 72.25877192982456%, Training F1: 0.6132828444272865, Validation F1: 0.719968971204145
Epoch 6, Training Accuracy: 63.92836257309942%, Validation Accuracy: 72.81140350877193%, Training F1: 0.6361745685747416, Validation F1: 0.7197043351032935
Epoch 7, Training Accuracy: 65.09356725146199%, Validation Accurac

In [19]:
# Final evaluation on the test set
print("Evaluating on the test set:")
evaluate(model, test_loader)[0]

Evaluating on the test set:


82.1140350877193

In [20]:
# Save the model and preprocessors
torch.save(model.state_dict(), 'song_mood_classifier.pth')
joblib.dump(dataset.label_encoder, 'label_encoder.joblib')
joblib.dump(dataset.scaler, 'scaler.joblib')

['scaler.joblib']