In [7]:
# Project 15: Spotify Mood Classifier (Expanded Data)
# Goal: Predict song mood (Happy, Sad, Chill) based on audio features (Danceability, Energy, Valence).

import pandas as pd
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# STEP 1: EXPANDED TRAINING DATA
# I added a 3rd category "Chill" to make the model smarter.
# - Happy: High Energy, High Valence.
# - Sad: Low Energy, Low Valence.
# - Chill: Low/Mid Energy, Mid/High Valence (Relaxing but not sad).

def get_training_data():
    data = [
        # --- HAPPY SONGS ---
        {'song': 'Happy',               'dance': 0.85, 'energy': 0.90, 'valence': 0.96, 'mood': 'Happy'},
        {'song': 'Shake It Off',        'dance': 0.80, 'energy': 0.85, 'valence': 0.90, 'mood': 'Happy'},
        {'song': 'Uptown Funk',         'dance': 0.90, 'energy': 0.95, 'valence': 0.93, 'mood': 'Happy'},
        {'song': 'Can\'t Stop Feeling', 'dance': 0.88, 'energy': 0.87, 'valence': 0.95, 'mood': 'Happy'},
        {'song': 'Levitating',          'dance': 0.75, 'energy': 0.80, 'valence': 0.85, 'mood': 'Happy'},

        # --- SAD SONGS ---
        {'song': 'Someone Like You',    'dance': 0.30, 'energy': 0.40, 'valence': 0.20, 'mood': 'Sad'},
        {'song': 'Yesterday',           'dance': 0.33, 'energy': 0.20, 'valence': 0.30, 'mood': 'Sad'},
        {'song': 'Fix You',             'dance': 0.20, 'energy': 0.30, 'valence': 0.15, 'mood': 'Sad'},
        {'song': 'The Sound of Silence','dance': 0.25, 'energy': 0.10, 'valence': 0.10, 'mood': 'Sad'},
        {'song': 'Drivers License',     'dance': 0.40, 'energy': 0.40, 'valence': 0.25, 'mood': 'Sad'},

        # --- CHILL / RELAX SONGS ---
        {'song': 'Banana Pancakes',     'dance': 0.60, 'energy': 0.40, 'valence': 0.65, 'mood': 'Chill'},
        {'song': 'Thinking Out Loud',   'dance': 0.55, 'energy': 0.50, 'valence': 0.60, 'mood': 'Chill'},
        {'song': 'Perfect',             'dance': 0.50, 'energy': 0.45, 'valence': 0.55, 'mood': 'Chill'},
        {'song': 'Lover',               'dance': 0.45, 'energy': 0.50, 'valence': 0.60, 'mood': 'Chill'},
        {'song': 'Sunflower',           'dance': 0.70, 'energy': 0.50, 'valence': 0.80, 'mood': 'Chill'} 
    ]
    return pd.DataFrame(data)

df = get_training_data()
print(f"Training Data Loaded: {len(df)} songs.")

# STEP 2: TRAIN MODEL (KNN)
# Features: danceability, energy, valence
X_train = df[['dance', 'energy', 'valence']]
y_train = df['mood']

# Since we have more data, we use n_neighbors=3 (The model looks at 3 nearest songs to decide)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# STEP 3: API SIMULATION (Dictionary Lookup)
# Simulating the JSON response you would get from Spotify for new test songs
spotify_db = {
    "Despacito":        {'dance': 0.85, 'energy': 0.80, 'valence': 0.88}, # Should be Happy
    "All I Want":       {'dance': 0.35, 'energy': 0.25, 'valence': 0.20}, # Should be Sad
    "Lazy Song":        {'dance': 0.70, 'energy': 0.40, 'valence': 0.75}, # Should be Chill/Happy
    "Bohemian Rhapsody":{'dance': 0.40, 'energy': 0.60, 'valence': 0.40}, # Complex, maybe Sad/Chill?
    "Stay With Me":     {'dance': 0.40, 'energy': 0.30, 'valence': 0.25}  # Should be Sad
}

def get_song_features(title):
    # Returns features if found, else returns random average features
    return spotify_db.get(title, {'dance': 0.5, 'energy': 0.5, 'valence': 0.5})

# STEP 4: PREDICT NEW SONGS
test_songs = ["Despacito", "All I Want", "Lazy Song", "Stay With Me"]

print("\n--- Spotify Mood Prediction ---")
print(f"{'SONG TITLE':<20} | {'PREDICTED MOOD':<15} | {'CONFIDENCE'}")
print("-" * 55)

feature_names = ['dance', 'energy', 'valence']

for song in test_songs:
    # 1. Get features
    features = get_song_features(song)
    
    input_data = pd.DataFrame([[
        features['dance'], 
        features['energy'], 
        features['valence']
    ]], columns=feature_names)
    
    # 2. Predict
    prediction = knn.predict(input_data)[0]
    
    # 3. Get probability
    proba = knn.predict_proba(input_data)
    confidence = np.max(proba) * 100
    
    print(f"{song:<20} | {prediction:<15} | {confidence:.0f}%")

Training Data Loaded: 15 songs.

--- Spotify Mood Prediction ---
SONG TITLE           | PREDICTED MOOD  | CONFIDENCE
-------------------------------------------------------
Despacito            | Happy           | 100%
All I Want           | Sad             | 100%
Lazy Song            | Chill           | 100%
Stay With Me         | Sad             | 100%


**EVALUATION RESULTS**

**1. Performance:** The KNN model successfully categorized songs into three distinct moods: 'Happy', 'Sad', and 'Chill'.

**2. Key Predictions:**
 - "Despacito" identified as 'Happy' due to high energy/valence metrics.
   
 - "Lazy Song" correctly classified as 'Chill', validating the new category for relaxed, mid-tempo tracks.

 - "Stay With Me" flagged as 'Sad' with 100% confidence.
   
**3. Confidence Score:** Achieved 100% confidence on all test cases, indicating clear separation between mood clusters in the training data.