<a href="https://colab.research.google.com/github/CamCranda11/MLFA25Project/blob/main/MLProjectModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [64]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

song_data = pd.read_csv('/content/sample_data/spotify_data.csv')

print("--- Original Data ---")
print(song_data.head())
print("\n")

--- Original Data ---
   Unnamed: 0    artist_name        track_name                track_id  \
0           0     Jason Mraz   I Won't Give Up  53QF56cjZA9RTuuMZDrSA6   
1           1     Jason Mraz  93 Million Miles  1s8tP3jP4GZcyHDsjvw218   
2           2  Joshua Hyslop  Do Not Let Me Go  7BRCa8MPiyuvr2VU3O9W0F   
3           3   Boyce Avenue          Fast Car  63wsZUhUZLlh1OsyrZq7sz   
4           4   Andrew Belle  Sky's Still Blue  6nXIYClvJAfi6ujLiKqEq8   

   popularity  year     genre  danceability  energy  key  loudness  mode  \
0          68  2012  acoustic         0.483   0.303    4   -10.058     1   
1          50  2012  acoustic         0.572   0.454    3   -10.286     1   
2          57  2012  acoustic         0.409   0.234    3   -13.711     1   
3          58  2012  acoustic         0.392   0.251   10    -9.845     1   
4          54  2012  acoustic         0.430   0.791    6    -5.419     0   

   speechiness  acousticness  instrumentalness  liveness  valence    tempo  

In [65]:
features_to_cluster = ['danceability', 'energy', 'mode']
features_df = song_data[features_to_cluster].copy()

mode_value = features_df['mode'].mode()[0]
features_df['mode'] = features_df['mode'].fillna(mode_value)

scaler = StandardScaler()
scaled_features_df = scaler.fit_transform(features_df)

print("--- Scaled Features (First 5 Rows) ---")
print(scaled_features_df[:5])
print("\n")

--- Scaled Features (First 5 Rows) ---
[[-0.29509342 -1.24461718  0.75872495]
 [ 0.18734904 -0.68639325  0.75872495]
 [-0.69622536 -1.49969964  0.75872495]
 [-0.78837729 -1.43685324  0.75872495]
 [-0.58239062  0.55944426 -1.31800069]]




In [66]:
optimal_k = 20

kmeans_model = KMeans(n_clusters=optimal_k, init='k-means++', n_init=10, random_state=42)
kmeans_model.fit(scaled_features_df)

cluster_labels = kmeans_model.labels_

song_cluster_df = song_data.copy()
song_cluster_df['cluster_id'] = cluster_labels

print("--- Data with Cluster IDs ---")
display(song_cluster_df.head())
print("\n")

--- Data with Cluster IDs ---


Unnamed: 0.1,Unnamed: 0,artist_name,track_name,track_id,popularity,year,genre,danceability,energy,key,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cluster_id
0,0,Jason Mraz,I Won't Give Up,53QF56cjZA9RTuuMZDrSA6,68,2012,acoustic,0.483,0.303,4,...,1,0.0429,0.694,0.0,0.115,0.139,133.406,240166,3,10
1,1,Jason Mraz,93 Million Miles,1s8tP3jP4GZcyHDsjvw218,50,2012,acoustic,0.572,0.454,3,...,1,0.0258,0.477,1.4e-05,0.0974,0.515,140.182,216387,4,16
2,2,Joshua Hyslop,Do Not Let Me Go,7BRCa8MPiyuvr2VU3O9W0F,57,2012,acoustic,0.409,0.234,3,...,1,0.0323,0.338,5e-05,0.0895,0.145,139.832,158960,4,10
3,3,Boyce Avenue,Fast Car,63wsZUhUZLlh1OsyrZq7sz,58,2012,acoustic,0.392,0.251,10,...,1,0.0363,0.807,0.0,0.0797,0.508,204.961,304293,4,10
4,4,Andrew Belle,Sky's Still Blue,6nXIYClvJAfi6ujLiKqEq8,54,2012,acoustic,0.43,0.791,6,...,0,0.0302,0.0726,0.0193,0.11,0.217,171.864,244320,4,14






In [67]:
def recommend_songs(input_song_name, input_artist_name, data_df, num_recs=3):
    """
    Recommends songs from the same cluster as the input song and artist.
    Includes track_id for generating Spotify links.
    """
    try:
        song_row = data_df[
            (data_df['track_name'] == input_song_name) &
            (data_df['artist_name'] == input_artist_name)
        ].iloc[0]
        song_cluster = song_row['cluster_id']

        recommendations = data_df[
            (data_df['cluster_id'] == song_cluster) &
            ((data_df['track_name'] != input_song_name) | (data_df['artist_name'] != input_artist_name))
        ]

        if len(recommendations) == 0:
            return "No similar songs found."

        return recommendations.sample(min(num_recs, len(recommendations)))

    except IndexError:
        return f"Song '{input_song_name}' by {input_artist_name} not found in the dataset."

In [69]:
my_song = 'Do Everything Now'
my_artist = 'Saint Motel'
recommendations = recommend_songs(my_song, my_artist, song_cluster_df, num_recs=5)

try:
    input_song_track_id = song_cluster_df[
        (song_cluster_df['track_name'] == my_song) &
        (song_cluster_df['artist_name'] == my_artist)
    ]['track_id'].iloc[0]
    print(f"--- Recommendations based on '{my_song}' by {my_artist} https://open.spotify.com/track/{input_song_track_id} ---")
except IndexError:
    print(f"--- Recommendations based on '{my_song}' by {my_artist} (Track ID not found) ---")

if isinstance(recommendations, pd.DataFrame):
    display(recommendations[['track_name', 'artist_name', 'track_id', 'genre', 'energy', 'danceability', 'mode', 'cluster_id']])
else:
    print(recommendations)


print("\n")

my_song_2 = 'm.A.A.d city'
my_artist_2 = 'Kendrick Lamar'
recommendations_2 = recommend_songs(my_song_2, my_artist_2, song_cluster_df, num_recs=5)

try:
    input_song_2_track_id = song_cluster_df[
        (song_cluster_df['track_name'] == my_song_2) &
        (song_cluster_df['artist_name'] == my_artist_2)
    ]['track_id'].iloc[0]
    print(f"--- Recommendations based on '{my_song_2}' by {my_artist_2} https://open.spotify.com/track/{input_song_2_track_id} ---")
except IndexError:
    print(f"--- Recommendations based on '{my_song_2}' by {my_artist_2} (Track ID not found) ---")

if isinstance(recommendations_2, pd.DataFrame):
    display(recommendations_2[['track_name', 'artist_name', 'track_id', 'genre', 'energy', 'danceability', 'mode', 'cluster_id']])
else:
    print(recommendations_2)

--- Recommendations based on 'Do Everything Now' by Saint Motel https://open.spotify.com/track/0Kj5gxd7F3JzTsIqa0MkoU ---


Unnamed: 0,track_name,artist_name,track_id,genre,energy,danceability,mode,cluster_id
534045,Beyond the Grave,Dark Funeral,7qqp0IJiUEpgSUcNhYXtXT,black-metal,0.949,0.291,0,8
711962,Sons of the Morrigan,Primordial,4ySi5utKrzJFT0pqzwX6JO,black-metal,0.931,0.242,0,8
158549,Antes de que cuente diez - Directo Teatro Arriaga,Fito y Fitipaldis,2a4kvaj6fGXhR8USeTOwAD,spanish,0.936,0.31,0,8
932793,Black Widow of La Porte,John 5,5qzba849erpnQeyp8zg135,classical,0.977,0.316,0,8
481474,Ways and Means,The Reverend Peyton's Big Damn Band,3aTKFTz6owUUZ5XMZ0q0tw,blues,0.825,0.33,0,8




--- Recommendations based on 'm.A.A.d city' by Kendrick Lamar https://open.spotify.com/track/439TlnnznSiBbQbgXiBqAd ---


Unnamed: 0,track_name,artist_name,track_id,genre,energy,danceability,mode,cluster_id
1060037,A Vida Do Viajante - Ao Vivo,Sérgio Reis,2JNg0WSj2IQlwUSAY52iz5,sertanejo,0.624,0.472,1,4
674452,Love Will Keep Us Together,Captain & Tennille,56ZPl9gFBZxPfRBOBqHfer,country,0.621,0.675,1,4
265002,Lost In Your Light (feat. Miguel),Dua Lipa,1v3bR4v6SruaE8efrTUqqp,dance,0.772,0.646,1,4
971062,天使の絵の具,Mari Iijima,3D8gdGUjOHEm7Xg1drhVNh,techno,0.728,0.603,1,4
1081520,Skeng - Autechre Remix,The Bug,52QQNYZRem9ejjBFI5erDR,dub,0.752,0.619,1,4
