In [1]:
# Install required packages
!pip install librosa tensorflow scikit-learn matplotlib





In [3]:
!pip install kagglehub



In [4]:
import kagglehub

# Download the latest version of the dataset
path = kagglehub.dataset_download("andradaolteanu/gtzan-dataset-music-genre-classification")

# Print the path where the dataset is saved
print("Path to dataset files:", path)


Path to dataset files: /kaggle/input/gtzan-dataset-music-genre-classification


In [5]:
import os

# List files in the dataset directory
dataset_path = '/kaggle/input/gtzan-dataset-music-genre-classification'
genres = os.listdir(dataset_path)
print(f"Genres found in the dataset: {genres}")


Genres found in the dataset: ['Data']


In [6]:
# List files inside the "Data" folder to understand the structure
data_path = os.path.join(dataset_path, 'Data')
data_files = os.listdir(data_path)
print(f"Files in the Data folder: {data_files}")


Files in the Data folder: ['features_3_sec.csv', 'features_30_sec.csv', 'images_original', 'genres_original']


In [7]:
import pandas as pd

# Load and inspect the 'features_30_sec.csv' file
features_file_path = os.path.join(data_path, 'features_30_sec.csv')
features_df = pd.read_csv(features_file_path)

# Show the first few rows to understand its structure
features_df.head()


Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues




In [8]:
# Extract relevant features (we'll use the 'mfcc1_mean' and other features for classification)
X = features_df[['chroma_stft_mean', 'rms_mean', 'spectral_centroid_mean', 'spectral_bandwidth_mean',
                 'rolloff_mean', 'zero_crossing_rate_mean', 'harmony_mean', 'perceptr_mean', 'tempo', 'mfcc1_mean']]

# Extract genre labels from filenames (assuming that the genre is the first part of the filename)
y = features_df['filename'].apply(lambda x: x.split('.')[0])  # Get the genre from filename (before .00000.wav)

# Encode labels (genres)
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Train/test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Check the split data
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")


Training data shape: (800, 10)
Test data shape: (200, 10)


In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

# Initialize KNN Classifier (you can experiment with n_neighbors)
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model
knn.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of KNN model: {accuracy * 100:.2f}%")

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


Accuracy of KNN model: 34.00%
Classification Report:
              precision    recall  f1-score   support

       blues       0.25      0.35      0.29        20
   classical       0.73      0.85      0.79        13
     country       0.30      0.26      0.28        27
       disco       0.25      0.33      0.29        21
      hiphop       0.16      0.33      0.21        15
        jazz       0.44      0.18      0.26        22
       metal       0.60      0.60      0.60        25
         pop       0.42      0.38      0.40        13
      reggae       0.27      0.17      0.21        23
        rock       0.23      0.14      0.18        21

    accuracy                           0.34       200
   macro avg       0.37      0.36      0.35       200
weighted avg       0.36      0.34      0.34       200



In [10]:
from sklearn.preprocessing import StandardScaler

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the KNN model with scaled features
knn_scaled = KNeighborsClassifier(n_neighbors=5)
knn_scaled.fit(X_train_scaled, y_train)

# Make predictions and evaluate the model
y_pred_scaled = knn_scaled.predict(X_test_scaled)
accuracy_scaled = accuracy_score(y_test, y_pred_scaled)
print(f"Accuracy of KNN model (scaled): {accuracy_scaled * 100:.2f}%")


Accuracy of KNN model (scaled): 50.00%


In [11]:
from sklearn.model_selection import GridSearchCV

# Define a parameter grid for tuning
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 13],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

# Initialize KNN Classifier
knn_tune = KNeighborsClassifier()

# Initialize GridSearchCV
grid_search = GridSearchCV(knn_tune, param_grid, cv=5, scoring='accuracy')

# Fit the model with the best parameters found
grid_search.fit(X_train_scaled, y_train)

# Best parameters and model
print("Best Hyperparameters:", grid_search.best_params_)

# Evaluate the best model
best_knn = grid_search.best_estimator_
y_pred_best = best_knn.predict(X_test_scaled)

# Print accuracy and classification report
best_accuracy = accuracy_score(y_test, y_pred_best)
print(f"Accuracy of Best KNN model: {best_accuracy * 100:.2f}%")
print("Classification Report (Tuned KNN):")
print(classification_report(y_test, y_pred_best, target_names=label_encoder.classes_))


Best Hyperparameters: {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'distance'}
Accuracy of Best KNN model: 52.50%
Classification Report (Tuned KNN):
              precision    recall  f1-score   support

       blues       0.48      0.55      0.51        20
   classical       0.76      1.00      0.87        13
     country       0.41      0.44      0.43        27
       disco       0.47      0.67      0.55        21
      hiphop       0.36      0.33      0.34        15
        jazz       0.71      0.68      0.70        22
       metal       0.64      0.72      0.68        25
         pop       0.62      0.38      0.48        13
      reggae       0.54      0.30      0.39        23
        rock       0.29      0.24      0.26        21

    accuracy                           0.53       200
   macro avg       0.53      0.53      0.52       200
weighted avg       0.52      0.53      0.51       200



In [12]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler  # If you haven't already imported it

# Assuming you have X_train, X_test, y_train, y_test, and label_encoder from your previous code

# 1. Feature Scaling (If not already done)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 2. Hyperparameter Tuning with GridSearchCV
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 13],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

knn_tune = KNeighborsClassifier()
grid_search = GridSearchCV(knn_tune, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train_scaled, y_train)

print("Best Hyperparameters:", grid_search.best_params_)

# 3. Evaluate the Best Model
best_knn = grid_search.best_estimator_
y_pred_best = best_knn.predict(X_test_scaled)

best_accuracy = accuracy_score(y_test, y_pred_best)
print(f"Accuracy of Best KNN model: {best_accuracy * 100:.2f}%")

print("Classification Report (Tuned KNN):")
print(classification_report(y_test, y_pred_best, target_names=label_encoder.classes_))

Best Hyperparameters: {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'distance'}
Accuracy of Best KNN model: 52.50%
Classification Report (Tuned KNN):
              precision    recall  f1-score   support

       blues       0.48      0.55      0.51        20
   classical       0.76      1.00      0.87        13
     country       0.41      0.44      0.43        27
       disco       0.47      0.67      0.55        21
      hiphop       0.36      0.33      0.34        15
        jazz       0.71      0.68      0.70        22
       metal       0.64      0.72      0.68        25
         pop       0.62      0.38      0.48        13
      reggae       0.54      0.30      0.39        23
        rock       0.29      0.24      0.26        21

    accuracy                           0.53       200
   macro avg       0.53      0.53      0.52       200
weighted avg       0.52      0.53      0.51       200



In [13]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Define parameter grid for KNN
param_grid = {
    'n_neighbors': [7, 15, 21, 25],  # Larger number of neighbors
    'metric': ['manhattan', 'cosine', 'chebyshev'],  # Different distance metrics
    'weights': ['uniform', 'distance']  # Weights as uniform or distance
}

# Set up KNN model
knn = KNeighborsClassifier()

# Set up GridSearchCV with 5-fold cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the model
grid_search.fit(X_train_scaled, y_train)

# Best parameters and accuracy
print(f"Best Hyperparameters: {grid_search.best_params_}")
print(f"Accuracy of Best KNN model: {grid_search.best_score_ * 100}%")


Fitting 5 folds for each of 24 candidates, totalling 120 fits
Best Hyperparameters: {'metric': 'manhattan', 'n_neighbors': 7, 'weights': 'distance'}
Accuracy of Best KNN model: 59.00000000000001%


In [14]:
from sklearn.preprocessing import StandardScaler

# Scale features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [16]:
# Reduce to 10 components (or fewer if necessary)
pca = PCA(n_components=10)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Fit KNN model with reduced features
knn.fit(X_train_pca, y_train)


In [17]:
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report

# Apply PCA (adjust the number of components based on your dataset)
pca = PCA(n_components=10)  # You can change 10 to any appropriate number
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Train KNN model on PCA-transformed features
knn.fit(X_train_pca, y_train)

# Predict on the test data
y_pred_pca = knn.predict(X_test_pca)

# Calculate accuracy and classification report
accuracy_pca = accuracy_score(y_test, y_pred_pca)
classification_report_pca = classification_report(y_test, y_pred_pca)

print(f"Accuracy: {accuracy_pca}")
print(f"Classification Report: \n{classification_report_pca}")


Accuracy: 0.5
Classification Report: 
              precision    recall  f1-score   support

           0       0.34      0.60      0.44        20
           1       0.72      1.00      0.84        13
           2       0.39      0.44      0.41        27
           3       0.46      0.62      0.53        21
           4       0.29      0.27      0.28        15
           5       0.80      0.55      0.65        22
           6       0.74      0.68      0.71        25
           7       0.75      0.46      0.57        13
           8       0.50      0.30      0.38        23
           9       0.29      0.19      0.23        21

    accuracy                           0.50       200
   macro avg       0.53      0.51      0.50       200
weighted avg       0.52      0.50      0.49       200

