# 🎶  Music Recommendation System





In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD


In [9]:
# Load your cleaned dataset
df = pd.read_csv("songs_data_cleaned(1).csv")
print("Shape:", df.shape)
df.head()


Shape: (10722, 15)


Unnamed: 0,Username,artist_name,track_name,genre,track_id,popularity,acousticness,danceability,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence
0,Babs_05,Kacey Musgraves,Slow Burn,Dance,6ET9kf9riLETWs9lePUEAI,73,0.462,0.555,0.43,0.000191,0.0869,-9.105,0.0295,147.613,0.274
1,Babs_05,Salt-N-Pepa,Push It,Dance,0TT7wJiEYD5GAeJfSR1ETX,66,0.00684,0.926,0.6,0.00187,0.0901,-12.485,0.0883,127.247,0.97
2,Babs_05,Fat Joe,"Lean Back (feat. Lil Jon, Eminem, Mase & Remy ...",Dance,2uiEqPjMmIJXOKplzsEASP,63,0.116,0.635,0.751,0.0,0.0951,-5.055,0.357,126.806,0.591
3,Babs_05,Dr. Dre,Big Ego's,Hip-Hop,2BMRUAA1oTc7e9JPlr6xbZ,55,0.582,0.823,0.816,7e-06,0.235,-1.48,0.249,88.688,0.485
4,Babs_05,Rodríguez,Crucify Your Mind,Folk,2Xn7NadvZ56D0B2D7x2CSL,43,0.217,0.505,0.561,0.0,0.0952,-7.772,0.0803,85.102,0.55


## Content-Based Filtering


In [18]:
# ===============================
# Cell 3 - Content-Based Filtering
# ===============================
features = ['acousticness','danceability','energy','instrumentalness',
            'liveness','loudness','speechiness','tempo','valence']

# Drop rows with missing values
df_cb = df.dropna(subset=features)

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_cb[features])

# Precompute cosine similarity
similarity = cosine_similarity(X_scaled)

def recommend_content(song_name, n=5):
    """Recommend n similar songs using audio features (deduplicated)."""
    if song_name not in df_cb['track_name'].values:
        return []
    idx = df_cb[df_cb['track_name'] == song_name].index[0]
    sim_scores = list(enumerate(similarity[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    seen = set()
    recs = []
    for i, _ in sim_scores[1:]:
        track = df_cb.iloc[i]['track_name']
        artist = df_cb.iloc[i]['artist_name']
        if track not in seen:   # prevent duplicates
            recs.append((track, artist))
            seen.add(track)
        if len(recs) >= n:
            break
    return recs



## Collaborative Filtering


In [17]:

# Build user-song interaction matrix
user_item_matrix = df.pivot_table(index='Username', columns='track_id', values='popularity').fillna(0)

# Normalize to [0,1]
user_item_matrix_norm = user_item_matrix / 100.0

# Apply Truncated SVD
k = 50
svd = TruncatedSVD(n_components=k, random_state=42)
U_k = svd.fit_transform(user_item_matrix_norm)
sigma_k = svd.singular_values_
Vt_k = svd.components_

# Predicted interaction matrix
pred_matrix = np.dot(U_k, np.dot(np.diag(sigma_k), Vt_k))

pred_df = pd.DataFrame(pred_matrix, index=user_item_matrix.index, columns=user_item_matrix.columns)

def recommend_collab(user_id=None, n=5):
    """Recommend n songs for a user using collaborative filtering (deduplicated)."""
    if user_id is None:
        user_id = pred_df.index[0]  # default to first user

    if user_id not in pred_df.index:
        return []

    user_preds = pred_df.loc[user_id].sort_values(ascending=False)[:n]
    recs = [(df[df['track_id']==tid]['track_name'].iloc[0],
             df[df['track_id']==tid]['artist_name'].iloc[0]) for tid in user_preds.index]
    return recs




## Hybrid
(Content-Based Filtering + Collaborative Filtering
)

In [15]:

def recommend_hybrid(user_id=None, song_name=None, alpha=0.6, n=5):
    """
    Hybrid recommender combining collaborative + content scores.
    alpha = weight for collaborative filtering (0.6 = 60% collab, 40% content).
    """
    if user_id is None:
        user_id = pred_df.index[0]  # default user
    if song_name is None:
        song_name = df_cb['track_name'].iloc[0]  # default song

    # Collab recs
    collab_recs = recommend_collab(user_id, n*2)
    collab_dict = {track: i for i,(track,_) in enumerate(collab_recs)}

    # Content recs
    content_recs = recommend_content(song_name, n*2)
    content_dict = {s[0]: i for i,s in enumerate(content_recs)}

    # Combine
    scores = {}
    all_tracks = set(collab_dict.keys()) | set(content_dict.keys())
    for t in all_tracks:
        c_score = (n - collab_dict.get(t, n)) / n
        f_score = (n - content_dict.get(t, n)) / n
        scores[t] = alpha*c_score + (1-alpha)*f_score

    final = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:n]
    return [(track, df[df['track_name']==track]['artist_name'].iloc[0]) for track,_ in final]




## Output compairing all recommendation systems

In [16]:


def compare_recommenders(song_name="Believer", user_id=None, n=10):
    print(f"\n🎵 Recommendations for: {song_name} (n={n})")
    print("="*60)

    # Content-based
    print("\n📌 Content-Based Filtering:")
    recs_content = recommend_content(song_name, n=n)
    for i, (track, artist) in enumerate(recs_content, 1):
        print(f"{i}. {track} — {artist}")

    # Collaborative
    print("\n📌 Collaborative Filtering:")
    recs_collab = recommend_collab(user_id, n=n)
    for i, (track, artist) in enumerate(recs_collab, 1):
        print(f"{i}. {track} — {artist}")

    # Hybrid
    print("\n📌 Hybrid Recommender (α=0.6):")
    recs_hybrid = recommend_hybrid(user_id, song_name, alpha=0.6, n=n)
    for i, (track, artist) in enumerate(recs_hybrid, 1):
        print(f"{i}. {track} — {artist}")

# Example usage
compare_recommenders(song_name="Believer", n=10)



🎵 Recommendations for: Believer (n=10)

📌 Content-Based Filtering:
1. One Kiss (with Dua Lipa) — Calvin Harris
2. Fuck the Pain Away — Peaches
3. Y.A.L.A. — M.I.A.
4. Greatest Love — Ciara
5. Say It Right — Nelly Furtado
6. Mono Poisoner — DECO*27
7. Sippin On Some Syrup (feat. UGK (Underground Kingz) & Project Pat) — Three 6 Mafia
8. Lose Control (feat. Ciara & Fat Man Scoop) — Missy Elliott
9. Watch Out For This (Bumaye) — Major Lazer
10. Damage — PARTYNEXTDOOR

📌 Collaborative Filtering:
1. 7 rings — Ariana Grande
2. Wow. — Post Malone
3. Sweet but Psycho — Ava Max
4. Sunflower - Spider-Man: Into the Spider-Verse — Post Malone
5. Without Me — Halsey
6. MIDDLE CHILD — J. Cole
7. Shallow — Lady Gaga
8. thank u, next — Ariana Grande
9. SICKO MODE — Travis Scott
10. Nothing Breaks Like a Heart (feat. Miley Cyrus) — Mark Ronson

📌 Hybrid Recommender (α=0.6):
1. 7 rings — Ariana Grande
2. Wow. — Post Malone
3. Sweet but Psycho — Ava Max
4. Sunflower - Spider-Man: Into the Spider-Verse — 