 ###  Import Libraries ###

In [9]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import joblib


####  Load & Prepare Data ####

In [10]:
df = pd.read_csv("features_30_sec.csv")

# Drop non-feature columns
X = df.drop(columns=["filename", "label"])
y = df["label"]

# Encode labels (e.g., 'blues' → 0)
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Optional: Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


#### Train/Test Split ####

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

### Train the Model ###

In [12]:
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train, y_train)


### Evaluate ###

In [13]:
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 0.78
Classification Report:
               precision    recall  f1-score   support

       blues       0.83      0.75      0.79        20
   classical       0.83      0.95      0.88        20
     country       0.70      0.80      0.74        20
       disco       0.75      0.60      0.67        20
      hiphop       0.67      0.80      0.73        20
        jazz       0.76      0.80      0.78        20
       metal       0.89      0.85      0.87        20
         pop       0.85      0.85      0.85        20
      reggae       0.76      0.80      0.78        20
        rock       0.80      0.60      0.69        20

    accuracy                           0.78       200
   macro avg       0.78      0.78      0.78       200
weighted avg       0.78      0.78      0.78       200



### Save Model & Scaler ###

In [14]:
joblib.dump(model, "rf1_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(le, "label_encoder.pkl")


['label_encoder.pkl']

### Optional: Predict on New Input ###

In [18]:
import pandas as pd
import numpy as np

# Feature names (shortened for readability, use your full list if needed)
columns = [
    'length', 'chroma_stft_mean', 'chroma_stft_var', 'rms_mean', 'rms_var', 
    'spectral_centroid_mean', 'spectral_centroid_var', 'spectral_bandwidth_mean',
    'spectral_bandwidth_var', 'rolloff_mean', 'rolloff_var', 'zero_crossing_rate_mean',
    'zero_crossing_rate_var', 'harmony_mean', 'harmony_var', 'perceptr_mean', 'perceptr_var',
    'tempo', 'mfcc1_mean', 'mfcc1_var', 'mfcc2_mean', 'mfcc2_var', 'mfcc3_mean', 'mfcc3_var',
    'mfcc4_mean', 'mfcc4_var', 'mfcc5_mean', 'mfcc5_var', 'mfcc6_mean', 'mfcc6_var',
    'mfcc7_mean', 'mfcc7_var', 'mfcc8_mean', 'mfcc8_var', 'mfcc9_mean', 'mfcc9_var',
    'mfcc10_mean', 'mfcc10_var', 'mfcc11_mean', 'mfcc11_var', 'mfcc12_mean', 'mfcc12_var',
    'mfcc13_mean', 'mfcc13_var', 'mfcc14_mean', 'mfcc14_var', 'mfcc15_mean', 'mfcc15_var',
    'mfcc16_mean', 'mfcc16_var', 'mfcc17_mean', 'mfcc17_var', 'mfcc18_mean', 'mfcc18_var',
    'mfcc19_mean', 'mfcc19_var', 'mfcc20_mean', 'mfcc20_var'
]

# The actual values from your message (copy all values as a list)
values = [
    661794, 0.38781213760375977, 0.08593204617500305, 0.05546330288052559, 0.0012869684724137187,
    2411.5866181146966, 421150.86847675144, 2435.9202566181157, 199724.1583286562, 4993.538147114269,
    2202921.094673376, 0.12693159984048724, 0.002332659071437532, 7.748983625788242e-05,
    0.002413538284599781, 0.00019716205133590847, 0.0006839741836301982, 161.4990234375,
    -186.63197326660156, 9603.1806640625, 86.64537048339844, 929.113525390625,
    -15.727551460266113, 593.0079956054688, 14.736834526062012, 110.48722839355469,
    -2.2047410011291504, 98.790771484375, 6.256115436553955, 102.51167297363281,
    -0.8920643329620361, 39.588172912597656, 0.6570257544517517, 50.240638732910156,
    -6.092410087585449, 120.02452087402344, -5.223966121673584, 182.8389892578125,
    -7.879560470581055, 155.8359832763672, -1.7269392013549805, 86.47122955322266,
    -4.704252243041992, 102.1563949584961, -2.0493123531341553, 98.2497787475586,
    1.4325697422027588, 57.027976989746094, 2.786587953567505, 64.75006103515625,
    3.316521167755127, 60.98043441772461, 3.114957094192505, 133.28054809570312,
    -4.411282539367676, 78.12748718261719, -5.034923553466797, 64.10726928710938
]

# Create DataFrame
sample = pd.DataFrame([values], columns=columns)



In [19]:
prediction = model.predict(sample)
print("Predicted Label:", prediction[0])

Predicted Label: 8




In [22]:

genre_name = le.inverse_transform([8])[0]
print("Predicted Genre:", genre_name)


Predicted Genre: reggae
