<a href="https://colab.research.google.com/github/CamCranda11/MLFA25Project/blob/main/MLProjectModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

song_data = pd.read_csv('spotify_data.csv')

print("--- Original Data ---")
print(song_data.head())
print("\n")

--- Original Data ---
   Unnamed: 0    artist_name        track_name                track_id  \
0           0     Jason Mraz   I Won't Give Up  53QF56cjZA9RTuuMZDrSA6   
1           1     Jason Mraz  93 Million Miles  1s8tP3jP4GZcyHDsjvw218   
2           2  Joshua Hyslop  Do Not Let Me Go  7BRCa8MPiyuvr2VU3O9W0F   
3           3   Boyce Avenue          Fast Car  63wsZUhUZLlh1OsyrZq7sz   
4           4   Andrew Belle  Sky's Still Blue  6nXIYClvJAfi6ujLiKqEq8   

   popularity  year     genre  danceability  energy  key  loudness  mode  \
0          68  2012  acoustic         0.483   0.303    4   -10.058     1   
1          50  2012  acoustic         0.572   0.454    3   -10.286     1   
2          57  2012  acoustic         0.409   0.234    3   -13.711     1   
3          58  2012  acoustic         0.392   0.251   10    -9.845     1   
4          54  2012  acoustic         0.430   0.791    6    -5.419     0   

   speechiness  acousticness  instrumentalness  liveness  valence    tempo  

In [3]:
features_to_cluster = ['danceability', 'energy', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'valence']
features_df = song_data[features_to_cluster].copy()

mode_value = features_df['mode'].mode()[0]
features_df['mode'] = features_df['mode'].fillna(mode_value)

scaler = StandardScaler()
scaled_features_df = scaler.fit_transform(features_df)

print("--- Scaled Features (First 5 Rows) ---")
print(scaled_features_df[:5])
print("\n")

--- Scaled Features (First 5 Rows) ---
[[-0.29509342 -1.24461718  0.75872495 -0.39352278  1.04922991 -0.69122871
  -1.17892497]
 [ 0.18734904 -0.68639325  0.75872495 -0.52833738  0.43794003 -0.69119118
   0.22134908]
 [-0.69622536 -1.49969964  0.75872495 -0.47709206  0.04637646 -0.69109175
  -1.15658017]
 [-0.78837729 -1.43685324  0.75872495 -0.44555648  1.36755137 -0.69122871
   0.19528015]
 [-0.58239062  0.55944426 -1.31800069 -0.49364824 -0.70125643 -0.63836256
  -0.88844259]]




In [4]:
optimal_k = 50

kmeans_model = KMeans(n_clusters=optimal_k, init='k-means++', n_init=10, random_state=42)
kmeans_model.fit(scaled_features_df)

cluster_labels = kmeans_model.labels_

song_cluster_df = song_data.copy()
song_cluster_df['cluster_id'] = cluster_labels

print("--- Data with Cluster IDs ---")
display(song_cluster_df.head())
print("\n")

--- Data with Cluster IDs ---


Unnamed: 0.1,Unnamed: 0,artist_name,track_name,track_id,popularity,year,genre,danceability,energy,key,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,cluster_id
0,0,Jason Mraz,I Won't Give Up,53QF56cjZA9RTuuMZDrSA6,68,2012,acoustic,0.483,0.303,4,...,1,0.0429,0.694,0.0,0.115,0.139,133.406,240166,3,17
1,1,Jason Mraz,93 Million Miles,1s8tP3jP4GZcyHDsjvw218,50,2012,acoustic,0.572,0.454,3,...,1,0.0258,0.477,1.4e-05,0.0974,0.515,140.182,216387,4,27
2,2,Joshua Hyslop,Do Not Let Me Go,7BRCa8MPiyuvr2VU3O9W0F,57,2012,acoustic,0.409,0.234,3,...,1,0.0323,0.338,5e-05,0.0895,0.145,139.832,158960,4,17
3,3,Boyce Avenue,Fast Car,63wsZUhUZLlh1OsyrZq7sz,58,2012,acoustic,0.392,0.251,10,...,1,0.0363,0.807,0.0,0.0797,0.508,204.961,304293,4,1
4,4,Andrew Belle,Sky's Still Blue,6nXIYClvJAfi6ujLiKqEq8,54,2012,acoustic,0.43,0.791,6,...,0,0.0302,0.0726,0.0193,0.11,0.217,171.864,244320,4,40






In [5]:
def recommend_songs(input_song_name, input_artist_name, data_df, num_recs=3):
    """
    Recommends songs from the same cluster and genre as the input song and artist.
    Includes track_id for generating Spotify links.
    """
    try:
        song_row = data_df[
            (data_df['track_name'] == input_song_name) &
            (data_df['artist_name'] == input_artist_name)
        ].iloc[0]
        song_cluster = song_row['cluster_id']
        song_genre = song_row['genre']

        recommendations = data_df[
            (data_df['cluster_id'] == song_cluster) &
            (data_df['genre'] == song_genre) &
            ((data_df['track_name'] != input_song_name) | (data_df['artist_name'] != input_artist_name))
        ]

        if len(recommendations) == 0:
            return "No similar songs found in the same genre."

        return recommendations.sample(min(num_recs, len(recommendations)))

    except IndexError:
        return f"Song '{input_song_name}' by {input_artist_name} not found in the dataset."

In [34]:
my_song = 'Tongues'
my_artist = 'Joywave'
recommendations = recommend_songs(my_song, my_artist, song_cluster_df, num_recs=5)

try:
    input_song_row = song_cluster_df[
        (song_cluster_df['track_name'] == my_song) &
        (song_cluster_df['artist_name'] == my_artist)
    ].iloc[0]
    input_song_track_id = input_song_row['track_id']
    print(f"--- Input Song: '{my_song}' by {my_artist} https://open.spotify.com/track/{input_song_track_id} ---")
    display(input_song_row[['genre', 'danceability', 'energy', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'valence', 'cluster_id']])

    print(f"\n--- Recommendations based on '{my_song}' by {my_artist} ---")
    if isinstance(recommendations, pd.DataFrame):
        display(recommendations[['track_name', 'artist_name', 'track_id', 'genre', 'danceability', 'energy', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'valence', 'cluster_id']])
    else:
        print(recommendations)


except IndexError:
    print(f"Song '{my_song}' by {my_artist} not found in the dataset.")

--- Input Song: 'Tongues' by Joywave https://open.spotify.com/track/5mCprFWOqe0jB96v9RhLBo ---


Unnamed: 0,162989
genre,alt-rock
danceability,0.827
energy,0.682
mode,0
speechiness,0.0435
acousticness,0.00048
instrumentalness,0.0187
valence,0.933
cluster_id,2



--- Recommendations based on 'Tongues' by Joywave ---


Unnamed: 0,track_name,artist_name,track_id,genre,danceability,energy,mode,speechiness,acousticness,instrumentalness,valence,cluster_id
928095,Hablan,Dread Mar I,3aazT0KqFmg8kkJCmXqDJV,alt-rock,0.773,0.412,0,0.0468,0.231,3e-06,0.947,2
667860,El Viejo,La Vela Puerca,7gKqjNMM4ikt7KFTRsgZgL,alt-rock,0.699,0.744,0,0.0275,0.0696,0.0,0.81,2
367931,Chau,No Te Va Gustar,3uP1USIwwDpOA8uw0LjY2P,alt-rock,0.807,0.377,0,0.052,0.151,0.00165,0.917,2
585090,Que Va A Suceder,La Ley,2lSO0InO0iMn3KLa4wDtOr,alt-rock,0.687,0.406,0,0.0337,0.16,0.0553,0.923,2
163254,Make Your Mind Up,Moon Taxi,4EAoAeIwUUvevkPLvhwTpx,alt-rock,0.746,0.839,0,0.0298,0.0046,9.3e-05,0.757,2


In [26]:
import pandas as pd

artist_name = "Joywave"

pd.set_option('display.max_rows', None)

artist_songs = song_data[song_data['artist_name'] == artist_name]

if len(artist_songs) > 0:
    print(f"--- Songs by {artist_name} ---")
    display(artist_songs)
else:
    print(f"No songs found for artist: {artist_name}")

pd.reset_option('display.max_rows')

--- Songs by Joywave ---


Unnamed: 0.1,Unnamed: 0,artist_name,track_name,track_id,popularity,year,genre,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
2654,2654,Joywave,Golden State,6b6KGKBboxwTgTxtYZYE7I,25,2012,alt-rock,0.483,0.919,6,-5.093,1,0.105,0.0132,0.00296,0.109,0.349,137.932,224667,4
34104,34104,Joywave,True Grit,1H8EZ7uCaKBEX97B7KoHTI,24,2012,indie-pop,0.517,0.945,11,-5.512,0,0.0454,0.0589,0.0026,0.318,0.307,127.945,245907,4
34115,34115,Joywave,Anemone,0geaQiAZN8AhkxLfyAiUXe,24,2012,indie-pop,0.608,0.872,5,-5.872,0,0.0404,0.153,0.00108,0.0507,0.323,106.981,255933,4
162971,162971,Joywave,Destruction,2iLxXSM7AOzB4RCNzk4bjd,53,2015,alt-rock,0.66,0.638,0,-6.827,1,0.0379,0.000438,0.0191,0.107,0.558,108.004,184413,4
162989,162989,Joywave,Tongues,5mCprFWOqe0jB96v9RhLBo,52,2015,alt-rock,0.827,0.682,6,-7.111,0,0.0435,0.00048,0.0187,0.101,0.933,117.967,234773,4
163151,163151,Joywave,Somebody New,2vcfImyMOQjYhnKOVkjoIR,42,2015,alt-rock,0.456,0.94,3,-3.334,0,0.0703,4.9e-05,0.000165,0.328,0.463,174.038,206320,4
163220,163220,Joywave,Traveling at the Speed of Light,5zZOhHw2p7VLPau3A29hJw,39,2015,alt-rock,0.57,0.636,0,-7.161,0,0.0418,0.315,0.00428,0.0784,0.364,135.01,453600,4
163239,163239,Joywave,Nice House,0GAvMukpDYssdZgMLsSuhx,38,2015,alt-rock,0.549,0.754,5,-6.818,1,0.0489,0.188,0.0692,0.124,0.0475,134.988,340040,4
163358,163358,Joywave,Now,2UbzSDEYP8eCg5K2a8d3PM,35,2015,alt-rock,0.575,0.872,1,-6.259,1,0.0854,0.00409,0.00469,0.0944,0.274,125.994,262213,4
163542,163542,Joywave,Bad Dreams,0B7B8DAnYcWHqcwqOpenth,32,2015,alt-rock,0.733,0.327,5,-7.324,0,0.0479,0.132,0.000592,0.132,0.406,118.058,256440,4
