In [1]:
# 📦 Step 1: Importing required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import pickle


In [2]:
# 📄 Step 2: Loading the dataset
df = pd.read_csv("SpotifyFeatures.csv")
df.head()


Unnamed: 0,genre,artist_name,track_name,track_id,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Movie,Henri Salvador,C'est beau de faire un Show,0BRjO6ga9RKCKjfDqeFgWV,0.0,0.611,0.389,99373.0,0.91,0.0,C#,0.346,-1.828,Major,0.0525,166.969,4/4,0.814
1,Movie,Martin & les fées,Perdu d'avance (par Gad Elmaleh),0BjC1NfoEOOusryehmNudP,1.0,0.246,0.59,137373.0,0.737,0.0,F#,0.151,-5.559,Minor,0.0868,174.003,4/4,0.816
2,Movie,Joseph Williams,Don't Let Me Be Lonely Tonight,0CoSDzoNIKCRs124s9uTVy,3.0,0.952,0.663,170267.0,0.131,0.0,C,0.103,-13.879,Minor,0.0362,99.488,5/4,0.368
3,Movie,Henri Salvador,Dis-moi Monsieur Gordon Cooper,0Gc6TVm52BwZD07Ki6tIvf,0.0,0.703,0.24,152427.0,0.326,0.0,C#,0.0985,-12.178,Major,0.0395,171.758,4/4,0.227
4,Movie,Fabien Nataf,Ouverture,0IuslXpMROHdEPvSl1fTQK,4.0,0.95,0.331,82625.0,0.225,0.123,F,0.202,-21.15,Major,0.0456,140.576,4/4,0.39


In [3]:
# 🎭 Step 4: Defining mood based on 'valence'
# - High valence (≥ 0.7) = happy
# - Low valence (< 0.3) = sad
# - Mid valence = neutral

def assign_mood(valence):
    if valence >= 0.7:
        return 'happy'
    elif valence < 0.3:
        return 'sad'
    else:
        return 'neutral'

df['mood'] = df['valence'].apply(assign_mood)


In [4]:
# 🔢 Step 5: Encoding mood labels to numbers using LabelEncoder
le = LabelEncoder()
df['mood_label'] = le.fit_transform(df['mood'])

# ✅ Mapping (for reference)
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print("Mood Mapping:", label_mapping)


Mood Mapping: {'happy': np.int64(0), 'neutral': np.int64(1), 'sad': np.int64(2)}


In [5]:
# 🎯 Step 6: Preparing features (X) and target (y)
X = df[['valence', 'energy', 'danceability', 'tempo']]
y = df['mood_label']


In [6]:
# 📏 Step 7: Standardizing features using StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [7]:
# 🧪 Step 8: Train/test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [8]:
# 🤖 Step 9: Training RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [9]:
# 📊 Step 10: Evaluating the model
y_pred = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))


✅ Accuracy: 1.0
              precision    recall  f1-score   support

       happy       1.00      1.00      1.00      4746
     neutral       1.00      1.00      1.00     12737
         sad       1.00      1.00      1.00      8286

    accuracy                           1.00     25769
   macro avg       1.00      1.00      1.00     25769
weighted avg       1.00      1.00      1.00     25769



In [10]:
# 💾 Step 11: Saving the trained model, scaler, and label encoder for later use
pickle.dump(model, open('mood_classifier.pkl', 'wb'))
pickle.dump(scaler, open('scaler.pkl', 'wb'))
pickle.dump(le, open('label_encoder.pkl', 'wb'))

print("🎉 Mood classifier, scaler, and label encoder saved successfully.")


🎉 Mood classifier, scaler, and label encoder saved successfully.
