In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import NMF
from sklearn.metrics.pairwise import cosine_similarity
import pickle

In [None]:
# from google.colab import files
# uploaded = files.upload()


In [None]:
# !mkdir -p /musicflow/backend
# !mv Music.csv /musicflow/backend/


loading into that musicflow directory

In [None]:
# %cd /musicflow/backend


In [None]:

df = pd.read_csv("/musicflow/backend/Music.csv")
df = df.sample(n=10000, random_state=42)
print("✅ Data loaded successfully!")
print(df.head())


✅ Data loaded successfully!
                       name             artist              spotify_id  \
37600               Freedom               Kygo  5Gj1wG8b12VQdEd3hUuSwo   
43556  If I Didn't Have You       Randy Travis  52erNkfUKv34u02CjcHOPj   
13540       A Little Wicked  Valerie Broussard  0ZPMI8Gnz7p6omBBIOwiO9   
15253          Yes Baby Yes       Mo' Horizons  2NxIOFFrMxjeiPogqIkno1   
63822               今夜妳會不會來           Leon Lai  2QyEqtetkZqCNOMkshxuhh   

                                                 preview  \
37600                                                 no   
43556                                                 no   
13540  https://p.scdn.co/mp3-preview/2f44695fc3b2b971...   
15253  https://p.scdn.co/mp3-preview/eea93f7e97cc1c63...   
63822                                                 no   

                                                     img  danceability  \
37600                                                 no         0.715   
43556             

data processing

In [None]:
numeric_features = [
    'danceability', 'energy', 'loudness', 'speechiness',
    'acousticness', 'instrumentalness', 'liveness', 'valence'
]

df = df.dropna(subset=numeric_features)
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df[numeric_features])

Train-Test Split for Collaborative Filtering



In [None]:
fake_users = [f"user_{i}" for i in range(1, 201)]
df['user_id'] = np.random.choice(fake_users, len(df))
df['rating'] = np.random.randint(1, 6, len(df))  # random rating 1-5

train_df, test_df = train_test_split(df[['user_id', 'name', 'rating']], test_size=0.2, random_state=42)
print(f"Training data: {len(train_df)}, Testing data: {len(test_df)}")

# --- Cell 5: Collaborative Filtering (NMF Model) ---
user_item_matrix = train_df.pivot_table(index='user_id', columns='name', values='rating').fillna(0)
# nmf_model = NMF(n_components=15, random_state=42)
nmf_model = NMF(n_components=8, max_iter=150, random_state=42)
user_features = nmf_model.fit_transform(user_item_matrix)
item_features = nmf_model.components_
print("✅ NMF Collaborative Filtering model trained")

Training data: 8000, Testing data: 2000
✅ NMF Collaborative Filtering model trained


Content-Based Filtering

In [None]:
content_similarity = cosine_similarity(scaled_features)
print("✅ Content similarity computed")

✅ Content similarity computed


Hybrid Recommendation Function

---




In [None]:
def hybrid_recommendation(song_name, top_n=5):
    if song_name not in df['name'].values:
        return ["Song not found"]
    idx = df.index[df['name'] == song_name][0]
    sim_scores = list(enumerate(content_similarity[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:top_n+1]
    rec_songs = [df.iloc[i[0]]['name'] for i in sim_scores]
    return rec_songs

print("✅ Hybrid recommender ready!")

✅ Hybrid recommender ready!


Save Model

In [None]:
import pickle
with open("/musicflow/backend/music_model.pkl", "wb") as f:
    pickle.dump({
        "scaler": scaler,
        "nmf_model": nmf_model,
        "user_features": user_features,
        "item_features": item_features,
        "df": df,
        "content_similarity": content_similarity
    }, f)
print("✅ Model saved successfully as music_model.pkl")

✅ Model saved successfully as music_model.pkl


In [None]:
# with open("/content/music_model.pkl", "wb") as f:
#     pickle.dump({...}, f)


In [None]:
from google.colab import files
files.download("/content/music_model.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>