In [None]:
# Step 1: Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Step 2: Load data
df = pd.read_csv('music_genre.csv')

# Step 3: Clean data
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)

# Step 4: Drop non-numeric or irrelevant columns
drop_cols = ['instance_id', 'artist_name', 'track_name', 'key', 'mode', 'obtained_date']
df.drop(columns=[col for col in drop_cols if col in df.columns], inplace=True)

# Step 5: Convert numeric columns
for col in df.columns:
    try:
        df[col] = pd.to_numeric(df[col])
    except:
        pass

# Step 6: Encode target label
label_encoder = LabelEncoder()
df['music_genre'] = label_encoder.fit_transform(df['music_genre'])

# Step 7: Feature-target split
X = df.drop('music_genre', axis=1)
y = df['music_genre']

# Step 8: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 9: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 10: Train classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Step 11: Evaluate model
y_pred = model.predict(X_test_scaled)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📄 Classification Report:\n")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# Step 12: Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('🎯 Confusion Matrix')
plt.show()

# Step 13: Feature importance
feature_importances = pd.Series(model.feature_importances_, index=X.columns)
plt.figure(figsize=(10,6))
feature_importances.sort_values().plot(kind='barh', color='teal')
plt.title('📊 Feature Importance')
plt.xlabel('Importance Score')
plt.ylabel('Feature')
plt.tight_layout()
plt.show()

# Step 14: Prediction function
def predict_genre(feature_dict):
    input_df = pd.DataFrame([feature_dict])
    input_scaled = scaler.transform(input_df)
    prediction = model.predict(input_scaled)
    predicted_genre = label_encoder.inverse_transform(prediction)[0]
    return predicted_genre

# Step 15: Example prediction
example_input = {
    'popularity': 40,
    'acousticness': 0.03,
    'danceability': 0.75,
    'duration_ms': 200000,
    'energy': 0.8,
    'instrumentalness': 0.2,
    'liveness': 0.1,
    'loudness': -5.0,
    'speechiness': 0.05,
    'tempo': 120.0,
    'valence': 0.7
}

predicted_genre = predict_genre(example_input)
print("🎵 Predicted Genre for Input:", predicted_genre)