# Introduction

# 🎵 Music Playlist Recommendation System

This notebook demonstrates a content-based music recommender system built using 
the **Spotify Features Dataset**.  

### Goals:
- Recommend similar songs based on audio features.
- Generate playlists by **seed song**, **genre**, or **mood**.
- Apply popularity weighting to balance similarity with mainstream appeal.

We will preprocess the dataset, build a recommender using 
`Nearest Neighbors`, and test it with different scenarios.


# Import Libraries + Load Dataset

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors

# Load dataset
df = pd.read_csv("../data/SpotifyFeatures.csv")

# Preview
print(df.shape)
df.head()


(232725, 18)


Unnamed: 0,genre,artist_name,track_name,track_id,popularity,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Movie,Henri Salvador,C'est beau de faire un Show,0BRjO6ga9RKCKjfDqeFgWV,0,0.611,0.389,99373,0.91,0.0,C#,0.346,-1.828,Major,0.0525,166.969,4/4,0.814
1,Movie,Martin & les fées,Perdu d'avance (par Gad Elmaleh),0BjC1NfoEOOusryehmNudP,1,0.246,0.59,137373,0.737,0.0,F#,0.151,-5.559,Minor,0.0868,174.003,4/4,0.816
2,Movie,Joseph Williams,Don't Let Me Be Lonely Tonight,0CoSDzoNIKCRs124s9uTVy,3,0.952,0.663,170267,0.131,0.0,C,0.103,-13.879,Minor,0.0362,99.488,5/4,0.368
3,Movie,Henri Salvador,Dis-moi Monsieur Gordon Cooper,0Gc6TVm52BwZD07Ki6tIvf,0,0.703,0.24,152427,0.326,0.0,C#,0.0985,-12.178,Major,0.0395,171.758,4/4,0.227
4,Movie,Fabien Nataf,Ouverture,0IuslXpMROHdEPvSl1fTQK,4,0.95,0.331,82625,0.225,0.123,F,0.202,-21.15,Major,0.0456,140.576,4/4,0.39


# Preprocess (Scaling, Features)

In [2]:
# Drop duplicates & NA
df = df.drop_duplicates().dropna().reset_index(drop=True)

# Select features
features = ['acousticness','danceability','energy','instrumentalness',
            'liveness','loudness','speechiness','tempo','valence','duration_ms','popularity']

music_df = df[['track_name','artist_name','genre'] + features].reset_index(drop=True)

# Scale features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(music_df[features])

# Fit Nearest Neighbors
nn = NearestNeighbors(metric='cosine', algorithm='brute')
nn.fit(scaled_features)


0,1,2
,n_neighbors,5
,radius,1.0
,algorithm,'brute'
,leaf_size,30
,metric,'cosine'
,p,2
,metric_params,
,n_jobs,


# Implement Recommender

In [3]:
def recommend_song(song_name, num_recommendations=5, genre_filter=None, popularity_weight=False):
    idx_list = music_df[music_df['track_name'].str.lower() == song_name.lower()].index.tolist()
    if len(idx_list) == 0:
        return f"❌ Song '{song_name}' not found in dataset."
    idx = idx_list[0]

    distances, indices = nn.kneighbors([scaled_features[idx]], n_neighbors=50)

    recs = music_df.iloc[indices[0][1:]].copy()
    recs["similarity"] = 1 - distances[0][1:]

    if genre_filter:
        recs = recs[recs['genre'].str.lower() == genre_filter.lower()]

    if recs.empty:
        return f"⚠️ No recommendations found for '{song_name}' with genre '{genre_filter}'. Try without filter."

    if popularity_weight:
        recs["popularity_norm"] = recs["popularity"] / 100
        recs["final_score"] = 0.7*recs["similarity"] + 0.3*recs["popularity_norm"]
        recs = recs.sort_values("final_score", ascending=False)
    else:
        recs = recs.sort_values("similarity", ascending=False)

    return recs[['track_name','artist_name','genre','popularity']].head(num_recommendations)


# Test Recommender (Different Cases)

In [4]:
# Normal test
print(recommend_song("C'est beau", num_recommendations=5))

# With genre filter
print(recommend_song("C'est beau", num_recommendations=5, genre_filter="Pop"))

# With popularity weighting
print(recommend_song("C'est beau", num_recommendations=5, popularity_weight=True))

# Non-existent song
print(recommend_song("RandomSong123", num_recommendations=5))


❌ Song 'C'est beau' not found in dataset.
❌ Song 'C'est beau' not found in dataset.
❌ Song 'C'est beau' not found in dataset.
❌ Song 'RandomSong123' not found in dataset.


# Generate Playlists

In [9]:
def generate_playlist(mood=None, genre=None, seed_song=None, playlist_length=10):
    recs = music_df.copy()

    # 🎵 1. Mood filtering
    if mood:
        if mood.lower() == "happy":
            recs = recs[recs['valence'] > 0.7]
        elif mood.lower() == "sad":
            recs = recs[recs['valence'] < 0.3]
        elif mood.lower() == "energetic":
            recs = recs[recs['energy'] > 0.7]

    # 🎶 2. Genre filtering
    if genre:
        recs = recs[recs['genre'].str.lower() == genre.lower()]

    # 🎼 3. Seed song recommendations
    if seed_song:
        result = recommend_song(seed_song, num_recommendations=playlist_length)
        # Ensure it's always a DataFrame
        if isinstance(result, str):  # error message from recommend_song
            return pd.DataFrame([{"track_name": result, "artist_name": "", "genre": ""}])
        return result

    # 🔀 4. Random sample
    if recs.empty:
        return pd.DataFrame([{"track_name": f"No songs found for mood='{mood}', genre='{genre}'",
                              "artist_name": "", "genre": ""}])

    return recs.sample(min(playlist_length, len(recs)))[['track_name','artist_name','genre']]


In [10]:
happy_playlist = generate_playlist(mood="happy", playlist_length=10)
pop_playlist   = generate_playlist(genre="Pop", playlist_length=10)
seed_playlist  = generate_playlist(seed_song="C'est beau", playlist_length=10)

print(happy_playlist)
print(pop_playlist)
print(seed_playlist)


                             track_name            artist_name  \
138380                      Livest Shit           The Movement   
151118  So Close - Michael Calfan Remix                   NOTD   
100474                          The Bay              Metronomy   
229662    Retrograded - Wankelmut Remix             The Knocks   
189093                   Natural Mystic            Los Pericos   
178099                      Kilimanjaro  The Shaolin Afronauts   
166677         Dude (Looks Like A Lady)              Aerosmith   
41730                Ride Like the Wind      Christopher Cross   
99248                              Love                    Zoé   
147709                8 Million Stories   A Tribe Called Quest   

                   genre  
138380            Reggae  
151118               Pop  
100474  Children’s Music  
229662              Soul  
189093               Ska  
178099              Jazz  
166677              Rock  
41730               Folk  
99248   Children’s Music  
147

# Export Playlists as CSV

In [11]:
happy_playlist.to_csv("../outputs/happy_playlist.csv", index=False)
pop_playlist.to_csv("../outputs/pop_playlist.csv", index=False)
seed_playlist.to_csv("../outputs/seed_playlist.csv", index=False)
