<a href="https://colab.research.google.com/github/CRAUGUTH/nnProject/blob/main/Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Training NN**


In [None]:
!pip install --upgrade spotipy torch pandas scikit-learn transformers librosa joblib

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib
from google.colab import drive

In [11]:
class MusicData(Dataset):
    def __init__(self, path):
        data = pd.read_csv(path)
        data = data.drop(['track_id', 'artists', 'album_name', 'track_name', 'explicit'], axis=1)
        self.label_encoder = LabelEncoder()
        data['track_genre'] = self.label_encoder.fit_transform(data['track_genre'])
        self.features = data.drop(['track_genre'], axis=1).values.astype(np.float32)
        self.labels = data['track_genre'].values.astype(np.float32)
        self.scaler = StandardScaler()
        self.features = self.scaler.fit_transform(self.features)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        x = self.features[idx]
        y = self.labels[idx]
        return x, y

In [12]:
class MLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(MLP, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.network(x)

In [None]:
# Mount Google Drive
drive.mount('/content/drive')
data_path = '/content/drive/My Drive/Junior/Semester_2/NN/Project/dataset.csv'

In [14]:
# Function to evaluate model performance
def evaluate(model, data_loader):
    model.eval()
    total_correct = 0
    total_samples = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            total_correct += (predicted == labels.long()).sum().item()
    accuracy = (total_correct / total_samples) * 100
    return accuracy

In [15]:
# Load dataset
dataset = MusicData(data_path)
save_path = "best_model"

# Splitting the dataset
total_size = len(dataset)
train_set_size = int(total_size * 0.6)
val_set_size = int(total_size * 0.2)
test_set_size = int(total_size * 0.2)
train_set, val_set, test_set = torch.utils.data.random_split(dataset, [train_set_size, val_set_size, test_set_size])

train_loader = DataLoader(dataset=train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(dataset=val_set, batch_size=64, shuffle=False)
test_loader = DataLoader(dataset=test_set, batch_size=64, shuffle=False)

# Model, loss, and optimizer
input_dim = dataset.features.shape[1]
num_classes = len(np.unique(dataset.labels))
model = MLP(input_dim, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

In [16]:
# Training loop
num_epochs = 25
for epoch in range(num_epochs):
    model.train()
    total_correct = 0
    total_samples = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.long())
        _, predicted = torch.max(outputs, 1)
        total_samples += labels.size(0)
        total_correct += (predicted == labels).sum().item()
        loss.backward()
        optimizer.step()

    # Adjust learning rate based on scheduler
    scheduler.step()

    val_accuracy = evaluate(model, val_loader)
    print(f'Epoch {epoch+1}, Validation Accuracy: {val_accuracy}%')

Epoch 1, Validation Accuracy: 51.84210526315789%
Epoch 2, Validation Accuracy: 60.88157894736842%
Epoch 3, Validation Accuracy: 65.13157894736842%
Epoch 4, Validation Accuracy: 67.81140350877193%
Epoch 5, Validation Accuracy: 70.92105263157895%
Epoch 6, Validation Accuracy: 71.77192982456141%
Epoch 7, Validation Accuracy: 75.25438596491229%
Epoch 8, Validation Accuracy: 75.46929824561404%
Epoch 9, Validation Accuracy: 75.6140350877193%
Epoch 10, Validation Accuracy: 77.8859649122807%
Epoch 11, Validation Accuracy: 78.45614035087719%
Epoch 12, Validation Accuracy: 78.49122807017544%
Epoch 13, Validation Accuracy: 79.46052631578947%
Epoch 14, Validation Accuracy: 79.60964912280701%
Epoch 15, Validation Accuracy: 80.31140350877193%
Epoch 16, Validation Accuracy: 79.99561403508771%
Epoch 17, Validation Accuracy: 80.28947368421052%
Epoch 18, Validation Accuracy: 80.97368421052632%
Epoch 19, Validation Accuracy: 80.9122807017544%
Epoch 20, Validation Accuracy: 82.58771929824562%
Epoch 21, Va

In [17]:
# Final evaluation on the test set
print("Evaluating on the test set:")
evaluate(model, test_loader)

Evaluating on the test set:


85.39473684210527

In [18]:
# Save the model and preprocessors
torch.save(model.state_dict(), 'song_mood_classifier.pth')
joblib.dump(dataset.label_encoder, 'label_encoder.joblib')
joblib.dump(dataset.scaler, 'scaler.joblib')

['scaler.joblib']