# Imports 

In [12]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

In [13]:
metadata = pd.read_csv('../data/processed_metadata.csv')

# Modelling and Model Evaluations - classifying genres from characteristics

In [15]:
features = ['dating', 'violence', 'world/life', 'night/time', 'shake the audience', 'family/gospel',
            'romantic', 'communication', 'obscene', 'music', 'movement/places', 'light/visual perceptions',
            'family/spiritual', 'like/girls', 'sadness', 'feelings', 'danceability', 'loudness',
            'acousticness', 'instrumentalness', 'valence', 'energy']

X_features = metadata[features]
y_genre = metadata['genre']
X_train_feat, X_test_feat, y_train_genre, y_test_genre = train_test_split(X_features, y_genre, test_size=0.2, random_state=42, stratify=y_genre)

scaler = StandardScaler()
X_train_feat_scaled = scaler.fit_transform(X_train_feat)
X_test_feat_scaled = scaler.transform(X_test_feat)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_feat_scaled, y_train_genre)

y_pred_genre = rf_model.predict(X_test_feat_scaled)
print(classification_report(y_test_genre, y_pred_genre))

param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

grid_search_rf = GridSearchCV(RandomForestClassifier(random_state=42), param_grid_rf, cv=5, scoring='accuracy')
grid_search_rf.fit(X_train_feat_scaled, y_train_genre)

print(f"Best parameters: {grid_search_rf.best_params_}")
best_rf_model = grid_search_rf.best_estimator_


y_pred_genre_best = best_rf_model.predict(X_test_feat_scaled)
print(classification_report(y_test_genre, y_pred_genre_best))

              precision    recall  f1-score   support

       blues       0.40      0.29      0.34       921
     country       0.43      0.57      0.49      1089
     hip hop       0.59      0.40      0.48       181
        jazz       0.50      0.38      0.43       769
         pop       0.37      0.47      0.41      1408
      reggae       0.50      0.42      0.46       500
        rock       0.43      0.32      0.36       807

    accuracy                           0.42      5675
   macro avg       0.46      0.41      0.42      5675
weighted avg       0.43      0.42      0.42      5675

Best parameters: {'max_depth': 30, 'min_samples_split': 2, 'n_estimators': 200}
              precision    recall  f1-score   support

       blues       0.41      0.28      0.33       921
     country       0.43      0.56      0.49      1089
     hip hop       0.61      0.39      0.48       181
        jazz       0.51      0.38      0.44       769
         pop       0.36      0.49      0.42      140

**Baseline Split of data to compare accuracy score**

In [16]:
metadata['decade'].value_counts(normalize = True)

decade
2010    0.198470
2000    0.168511
1980    0.164775
1990    0.157091
1970    0.139257
1960    0.120154
1950    0.051741
Name: proportion, dtype: float64

**Evaluating Model Performance - by genre**

**Blues** - The F1-score slightly decreased after tuning, indicating a marginal drop in balanced performance between precision and recall. Precision increased slightly, but recall decreased, meaning the model was better at identifying true blues songs but missed more blues songs overall.

**Country** - Both recall and F1-score slightly decreased after tuning, but precision remained the same. The model was better at identifying true country songs before tuning.

**Hip Hop** - Precision increased slightly after tuning, but recall decreased. The F1-score remained the same. The model became more precise but less capable of identifying all true hip hop songs.

**Jazz** - The F1-score improved after tuning, indicating better balanced performance between precision and recall. The model improved in identifying true jazz songs.

**Pop** - The F1-score and recall both improved after tuning, showing that the model became better at identifying true pop songs and predicting pop songs correctly.

**Reggae** - The F1-score improved, indicating better balanced performance. Both precision and recall increased, showing overall improvement in identifying and predicting reggae songs.

**Rock** - The F1-score improved slightly after tuning, indicating a minor improvement in balanced performance. Precision remained the same, but recall slightly increased.

**Overall Model Performance**

Accuracy remained the same at 0.42 before and after tuning however macro average precision and F1-score slightly improved, indicating that the model's performance across all genres improved in terms of balanced performance between precision and recall. The weighted average metrics remained the same, indicating no significant change in overall performance for the majority class.
Conclusion
The hyperparameter tuning resulted in slight improvements for some genres, particularly pop and reggae, while others like blues and country saw slight decreases in performance. The overall accuracy remained unchanged, suggesting that while tuning helped specific classes, it did not significantly impact the overall model performance. The balanced performance (F1-score) across all genres saw a marginal improvement, indicating the model became slightly better at handling the imbalanced class distribution.