Load and Explore Dataset



In [51]:
import pandas as pd

In [52]:
df=pd.read_csv('SpotifyFeatures.csv')
df = df.drop(columns=["track_id", "track_name", "artist_name"])
df.dropna(inplace=True)
df.head()

Unnamed: 0,genre,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Movie,0,0.611,0.389,99373,0.91,0.0,C#,0.346,-1.828,Major,0.0525,166.969,4/4,0.814
1,Movie,1,0.246,0.59,137373,0.737,0.0,F#,0.151,-5.559,Minor,0.0868,174.003,4/4,0.816
2,Movie,3,0.952,0.663,170267,0.131,0.0,C,0.103,-13.879,Minor,0.0362,99.488,5/4,0.368
3,Movie,0,0.703,0.24,152427,0.326,0.0,C#,0.0985,-12.178,Major,0.0395,171.758,4/4,0.227
4,Movie,4,0.95,0.331,82625,0.225,0.123,F,0.202,-21.15,Major,0.0456,140.576,4/4,0.39


EDA (Exploratory Data Analysis)

In [53]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.pairplot(df[["acousticness", "danceability", "energy", "instrumentalness", "tempo", "genre"]], hue="genre")
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.select_dtypes(include='number').corr(), annot=True, cmap="coolwarm")
plt.title("Feature Correlation Heatmap")
plt.show()

In [None]:
features = ["acousticness", "danceability", "energy", "instrumentalness", "tempo"]
for feature in features:
    plt.figure(figsize=(8, 4))
    sns.boxplot(x='genre', y=feature, data=df)
    plt.xticks(rotation=45)
    plt.title(f"{feature} by Genre")
    plt.show()

4. Preprocessing

In [None]:
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
label_encoder = LabelEncoder()
df['genre'] = label_encoder.fit_transform(df['genre'])

In [None]:
if df['key'].dtype == 'object':
    df['key'] = label_encoder.fit_transform(df['key'])

In [None]:
X = df.drop('genre', axis=1)
y = df['genre']

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


Train the Model

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
clf = RandomForestClassifier(n_estimators=300, max_depth=25, random_state=42)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

Evaluate the Model

In [None]:
from sklearn.metrics import accuracy_score,f1_score,classification_report,confusion_matrix

In [None]:
print("Accuracy      :", accuracy_score(y_test, y_pred))
print("F1 Score      :", f1_score(y_test, y_pred, average='weighted'))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))




In [None]:
plt.figure(figsize=(10, 6))
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", xticklabels=le.classes_, yticklabels=le.classes_, cmap="Blues")
plt.title("Confusion Matrix")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.xticks(rotation=45)
plt.yticks(rotation=45)
plt.tight_layout()
plt.show()