In [1]:
# Library Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

import glob
import os

import librosa
import librosa.display


import torch
from torch import nn
from torch.nn import Linear
import torch.nn.functional as F
from torchvision import models, transforms, datasets
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv


In [2]:
# GTZAN dataset
general_path = 'Data'
print(list(os.listdir(f'{general_path}/genres_original/')))

['country', 'metal', 'jazz', 'hiphop', 'blues', 'classical', 'reggae', 'rock', 'pop', 'disco']


In [3]:
# Next Steps
# Ensure all MEL-spectrograms are same NxN dimensions
# Convert them into compressed vector representations
# create train-test-split,

In [4]:
df = pd.read_csv("Data/features_3_sec.csv")
df.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


In [5]:
df.shape

(9990, 60)

In [6]:
'''
Some logic derived from reading through the following Kaggle set: https://www.kaggle.com/code/aishwarya2210/let-s-tune-the-music-with-cnn-xgboost/notebook
This is for scaling and extracting the features as needed, and converting the labels to numerical values. 
This will help to create the training and testing sets, then will be fed into the model
'''

# Label Encoding - encod the categorical classes with numerical integer values for training

# Blues - 0
# Classical - 1
# Country - 2
# Disco - 3
# Hip-hop - 4 
# Jazz - 5  
# Metal - 6 
# Pop - 7
# Reggae - 8
# Rock - 9

#To convert categorical data into model-understandable numerica data
class_list = df.iloc[:, -1]
convertor = LabelEncoder()

df = df.drop(labels="filename", axis=1)
df.head()

Unnamed: 0,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,rolloff_mean,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,3714.560359,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,3869.682242,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,3997.63916,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,3568.300218,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,3469.992864,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


In [7]:
#Fitting the label encoder & return encoded labels
y = convertor.fit_transform(class_list)
y

array([0, 0, 0, ..., 9, 9, 9])

In [8]:

fit = StandardScaler()
X = fit.fit_transform(np.array(df.iloc[:, :-1], dtype = float))

In [9]:
x_train,x_test,y_train,y_test=train_test_split(df.iloc[:,:-1],y,test_size=0.3)
x_train.head()
# y_train.item(1)
x_train.shape

(6993, 58)

In [None]:
# REDUCE DATASET TO A TENTH OF ITS SIZE
sample_size = df.shape[0] // 10  # Reduce to 10% of the original size
sampled_indices = df.sample(n=sample_size, random_state=42).index  # Randomly sample indices
df = df.loc[sampled_indices]  # Apply sampling to df
y = y[sampled_indices]  # Apply the same sampling to y


# Removing other unecessary variables
df = df.drop(labels="length", axis=1)
df = df.drop(labels="label", axis=1)

# Creataing edge tensor, 1 - Identity Matrix --> tensor
adj_matrix = 1 - torch.eye(sample_size)
edge_index = torch.nonzero(adj_matrix, as_tuple=False).t()



# Randomly drop 90% of the edges
num_edges = edge_index.shape[1]  # Total number of edges
num_edges_to_keep = num_edges // 1  # Keep only half the edges

# Shuffle the edges randomly and keep only half
perm = torch.randperm(num_edges)  # Generate a random permutation of indices
edge_index = edge_index[:, perm[:num_edges_to_keep]]  # Select only half of the edges




# Setting data features to tensor
x = torch.tensor(df.values)
x = x.to(torch.float32)

# Normalize
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x = torch.tensor(scaler.fit_transform(df.values), dtype=torch.float32)

# Setting y to tensor
y = torch.tensor(y, dtype=torch.long)  # Make sure target is of type long (integer labels)

# Create data object
data = Data(x=x, edge_index=edge_index, y=y)

# Setup test/train/validation split 80/10/10:
from sklearn.model_selection import train_test_split
# split the data into 80% training and 20% remaining (test + validation)
train_indices, remaining_indices = train_test_split(
    range(sample_size), test_size=0.2, stratify=y.numpy(), random_state=42
)
# split the remaining 20% into 50% for validation and 50% for testing
val_indices, test_indices = train_test_split(
    remaining_indices, test_size=0.5, stratify=y.numpy()[remaining_indices], random_state=42
)
# initialize masks for each split
train_mask = torch.zeros(sample_size, dtype=torch.bool)
val_mask = torch.zeros(sample_size, dtype=torch.bool)
test_mask = torch.zeros(sample_size, dtype=torch.bool)
# set the corresponding entries to True for each mask
train_mask[train_indices] = True
val_mask[val_indices] = True
test_mask[test_indices] = True
# assign the masks to the data object
data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask

# Check dimmensions
data

Data(x=[199, 57], edge_index=[2, 39402], y=[199], train_mask=[199], val_mask=[199], test_mask=[199])

In [11]:
print(f"x shape: {data.x.shape}")
print(f"edge_index shape: {data.edge_index.shape}")
print(f"y shape: {data.y.shape}")
print(f"train_mask shape: {data.train_mask.shape}")
print(f"val_mask shape: {data.val_mask.shape}")
print(f"test_mask shape: {data.test_mask.shape}")

x shape: torch.Size([199, 57])
edge_index shape: torch.Size([2, 39402])
y shape: torch.Size([199])
train_mask shape: torch.Size([199])
val_mask shape: torch.Size([199])
test_mask shape: torch.Size([199])


In [12]:
# import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

# Modified from https://www.geeksforgeeks.org/graph-neural-networks-with-pytorch/

class GCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate):
        super(GCN, self).__init__()
        self.layer1 = GCNConv(input_dim, hidden_dim)
        self.bn1 = torch.nn.BatchNorm1d(hidden_dim)  # Optional, for normalization
        self.dropout = torch.nn.Dropout(p=dropout_rate)
        self.layer2 = GCNConv(hidden_dim, output_dim)

    def forward(self, x, edge_index):
        # First layer
        h = self.layer1(x, edge_index)
        h = self.bn1(h)  # Optional batch normalization
        h = F.relu(h)
        h = self.dropout(h)

        # Second layer
        z = self.layer2(h, edge_index)
        
        return z

model_test = GCN(df.shape[1], df.shape[1], 10, 0.5)
print(model_test)

GCN(
  (layer1): GCNConv(57, 57)
  (bn1): BatchNorm1d(57, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dropout): Dropout(p=0.5, inplace=False)
  (layer2): GCNConv(57, 10)
)


In [13]:
# Initialize the GNN model
input_feature_count = df.shape[1]
hidden_dim = df.shape[1]  # PICK!
num_classes = 10
learning_rate = 0.001
weight_decay = 1e-4
dropout_rate = 0
num_epochs = 1000

model = GCN(input_feature_count, hidden_dim, num_classes, dropout_rate)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

graph_data = data  # Get the graph data

# Cross-entropy loss function
criterion = torch.nn.CrossEntropyLoss()

# Pre-allocate arrays for loss, training accuracy, and validation accuracy
losses = torch.zeros(num_epochs)
train_accs = torch.zeros(num_epochs)
val_accs = torch.zeros(num_epochs)

def train_model():
    model.train()
    optimizer.zero_grad()
    output = model(graph_data.x, graph_data.edge_index)
    loss = criterion(output[graph_data.train_mask], graph_data.y[graph_data.train_mask])
    loss.backward()
    optimizer.step()
    
    # Training accuracy
    predictions = output.argmax(dim=1)
    correct = (predictions[graph_data.train_mask] == graph_data.y[graph_data.train_mask]).sum()
    train_acc = int(correct) / int(graph_data.train_mask.sum())
    return loss.item(), train_acc, output

def evaluate_model(mask):
    model.eval()
    with torch.no_grad():
        output = model(graph_data.x, graph_data.edge_index)
        predictions = output.argmax(dim=1)
        correct = (predictions[mask] == graph_data.y[mask]).sum()
        acc = int(correct) / int(mask.sum())
    return acc

In [14]:
# Training loop
for epoch in range(num_epochs):
    loss_value, train_acc, output = train_model()
    print(output)
    val_acc = evaluate_model(graph_data.val_mask)  # Evaluate on validation set
    
    # Save loss and accuracy for each epoch in pre-allocated arrays
    losses[epoch] = loss_value
    train_accs[epoch] = train_acc
    val_accs[epoch] = val_acc
    
    #if (epoch+1) % 10 == 0:
    #print(f'Epoch: {epoch+1:03d}, Loss: {loss_value:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')

# After training, evaluate the model on the test set
test_acc = evaluate_model(graph_data.test_mask)  # Evaluate on test set
print(f'Test Accuracy: {test_acc:.4f}')

tensor([[-1.9437e-06, -1.4946e-06, -1.4198e-07,  ...,  3.8186e-06,
         -2.6444e-06,  1.0195e-07],
        [-1.9437e-06, -1.4946e-06, -1.4198e-07,  ...,  3.8186e-06,
         -2.6444e-06,  1.0195e-07],
        [-1.9437e-06, -1.4946e-06, -1.4198e-07,  ...,  3.8186e-06,
         -2.6444e-06,  1.0195e-07],
        ...,
        [-1.9437e-06, -1.4946e-06, -1.4198e-07,  ...,  3.8186e-06,
         -2.6444e-06,  1.0195e-07],
        [-1.9437e-06, -1.4946e-06, -1.4198e-07,  ...,  3.8186e-06,
         -2.6444e-06,  1.0195e-07],
        [-1.9437e-06, -1.4946e-06, -1.4198e-07,  ...,  3.8186e-06,
         -2.6444e-06,  1.0195e-07]], grad_fn=<AddBackward0>)
tensor([[ 0.0015,  0.0010,  0.0006,  ..., -0.0021,  0.0011,  0.0008],
        [ 0.0015,  0.0010,  0.0006,  ..., -0.0021,  0.0011,  0.0008],
        [ 0.0015,  0.0010,  0.0006,  ..., -0.0021,  0.0011,  0.0008],
        ...,
        [ 0.0015,  0.0010,  0.0006,  ..., -0.0021,  0.0011,  0.0008],
        [ 0.0015,  0.0010,  0.0006,  ..., -0.0021, 

KeyboardInterrupt: 

In [None]:
print(output[1])
print(output[10])
print(output[100])

In [None]:
print(graph_data.train_mask.sum())
print(graph_data.test_mask.sum())
print(graph_data.val_mask.sum())

In [None]:
print(graph_data.y[graph_data.train_mask])