# Moosic
## Spotify Project using Kmeans for clustering songs and uplaoding them into playlists via API

## 1. Import libraries


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans


from sklearn import set_config
set_config(transform_output='pandas')
import spotipy
import pickle
from spotipy.oauth2 import SpotifyOAuth
from google.colab import userdata # only in Google Colab for secret management

from IPython.display import display, HTML

## 2. Read the file from drive and drop columns


In [None]:
url = "https://drive.google.com/file/d/1TT_enEgswh6NxZyQi2x6hdqpa7k_0tlX/view?usp=sharing"
path = f"https://drive.google.com/uc?export=download&id={url.split('/')[-2]}"
original_cleaned_songs_df = pd.read_csv(path)

# create a copy
songs_df = original_cleaned_songs_df.copy()

# data cleaning
#songs_df.columns = songs_df.columns.str.strip()
#songs_df = songs_df.drop([ 'artist', 'duration_ms', 'time_signature', 'key', 'mode', 'type'], axis=1)

songs_df.head()

Unnamed: 0,name,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,html,cluster
0,Se Eu Quiser Falar Com Deus,658.0,259.0,-13.141,705.0,694.0,5.9e-05,975.0,306.0,110.376,1n7JnwviZ7zf0LR1tcGFq7,https://open.spotify.com/track/1n7JnwviZ7zf0LR...,0.0
1,Saudade De Bahia,742.0,399.0,-12.646,346.0,217.0,2e-06,107.0,693.0,125.039,5QGM1U0eCYrQuwSJwTm5Zq,https://open.spotify.com/track/5QGM1U0eCYrQuwS...,4.0
2,"Canta Canta, Minha Gente ...",851.0,0.73,-11.048,347.0,453.0,6.3e-05,124.0,905.0,93.698,0NLIFSZxPzQhCwnkn5PJYs,https://open.spotify.com/track/0NLIFSZxPzQhCwn...,8.0
3,Mulher Eu Sei,705.0,502.0,-18.115,471.0,879.0,4.1e-05,386.0,524.0,106.802,3mXqOdlLE1k67WsAxryPFs,https://open.spotify.com/track/3mXqOdlLE1k67Ws...,4.0
4,Rosa Morena,651.0,119.0,-19.807,38.0,916.0,343.0,104.0,402.0,120.941,7bSzjzjTkWT2CkIPPdp0eA,https://open.spotify.com/track/7bSzjzjTkWT2CkI...,6.0


In [None]:
!pip install spotipy




# 3. Authentication with Spotify

In [None]:


scope = 'playlist-modify-public'
username = 'USER'
redirect_uri = 'xxx'

# Create the auth manager
sp_oauth = SpotifyOAuth(
    client_id='xxx',
    client_secret='xxx',
    redirect_uri=redirect_uri,
    scope=scope,
    open_browser=False
)

# Step 1: Get the authorization URL
auth_url = sp_oauth.get_authorize_url()
display(HTML(f'<a href="{auth_url}" target="_blank">Click here to authorize Spotify access</a>'))

# Step 2: After clicking, copy the full redirect URL and paste below:
redirect_response = input("Paste the full redirect URL here: ")

# Step 3: Extract the token
code = sp_oauth.parse_response_code(redirect_response)
token_info = sp_oauth.get_access_token(code, as_dict=False)
spotifyObject = spotipy.Spotify(auth=token_info)

print("✅ Authenticated!")


Paste the full redirect URL here: http://127.0.0.1:8888/callback?code=AQDCDjGcApWssPk-t2fQzsoTh655lG9e6vcuN_vQZVI95TSntMk0GcHrz48-LndgDDhYq3F-lyqZI6UEwcdnXFu7ldqcn0F97eVTwwOEqq7DjGwoH5d0ssARFOsUIN8Azz5vf0V-dOf_5FpTr0fOtx1Ya01Wz8TtYxA5znKf32xY4RrMOzojEX1yk0gsluUfSK7gWFF5ntvMxw
✅ Authenticated!


# 4. Use Playlist names from Google Doc, create loops to create lists and add names

In [None]:
# Define a dictionary for custom names for each cluster
cluster_names = {
    0: "Classic Pop & Rock",
    1: "Hip-Hop & Pop-Rock Fusion",
    2: "Jazz & Neo-Classical Piano",
    3: "Extreme Death Metal",
    4: "Indie & World-Beat",
    5: "Old-School Death Metal",
    6: "Global Pop & MPB",
    7: "Classical Masters",
    8: "Country & Folk Rock Classics",
    9: "Latin Pop & Reggaeton",
    10: "Electronic & Gospel",
    11: "Vintage Jazz & R&B",
    12: "Deep House & Techno",
    13: "Contemporary Classical & Ambient",
    14: "Urban Pop & Soft Rock",
    15: "Electronic & Hip-Hop Fusion",
    16: "Melodic Death Metal",
    17: "Electro House & Dance",
    18: "Raw Death Metal",
    19: "90s Death Metal",
    20: "Baroque & Jazz Classics",
    21: "Hardstyle",
    22: "Jazz Legends & Piano Masterpieces",
    23: "Vintage Pop & Chanson",
    24: "Dream Pop & Indie"
}

n_clusters = 25
playlist_collection = {}
for i in range(n_clusters):  # This range must match the number of clusters
    # Use the custom name for the cluster or a fallback if the cluster number is missing
    playlist_name = cluster_names.get(i, f"Cluster {i}")
    playlist_description = f'This is a test playlist for cluster {i}'

    # Create the playlist
    playlist_id = spotifyObject.user_playlist_create(user=username,
                                                     name=playlist_name,
                                                     public=True,
                                                     description=playlist_description)['id']

    # Get all song IDs for the current cluster
    id_list = list(songs_df.loc[songs_df["cluster25"] == i].id)

    # Skip to next iteration if there are no songs in this cluster
    if not id_list:
        print(f"⚠️ No tracks to add for cluster {i}. Skipping.")
        continue

    # Spotify needs certain text wrappers around track IDs to identify the target as a track
    uris = [f'spotify:track:{str(id).strip()}' for id in id_list]

    # Add tracks in chunks of 100
    for i in range(0, len(uris), 100):  # Process in chunks of 100
        uris_chunk = uris[i:i+100]
        # Add tracks to the newly created playlist
        spotifyObject.user_playlist_add_tracks(user=username, playlist_id=playlist_id, tracks=uris_chunk)
        print(f"✅ Added {len(uris_chunk)} tracks from cluster {i} to playlist '{playlist_name}'")

    playlist_collection[playlist_name] = playlist_id


✅ Added 100 tracks from cluster 0 to playlist 'Classic Pop & Rock'
✅ Added 100 tracks from cluster 100 to playlist 'Classic Pop & Rock'
✅ Added 34 tracks from cluster 200 to playlist 'Classic Pop & Rock'
✅ Added 100 tracks from cluster 0 to playlist 'Hip-Hop & Pop-Rock Fusion'
✅ Added 100 tracks from cluster 100 to playlist 'Hip-Hop & Pop-Rock Fusion'
✅ Added 100 tracks from cluster 200 to playlist 'Hip-Hop & Pop-Rock Fusion'
✅ Added 54 tracks from cluster 300 to playlist 'Hip-Hop & Pop-Rock Fusion'
✅ Added 100 tracks from cluster 0 to playlist 'Jazz & Neo-Classical Piano'
✅ Added 100 tracks from cluster 100 to playlist 'Jazz & Neo-Classical Piano'
✅ Added 81 tracks from cluster 200 to playlist 'Jazz & Neo-Classical Piano'
✅ Added 100 tracks from cluster 0 to playlist 'Extreme Death Metal'
✅ Added 100 tracks from cluster 100 to playlist 'Extreme Death Metal'
✅ Added 26 tracks from cluster 200 to playlist 'Extreme Death Metal'
✅ Added 100 tracks from cluster 0 to playlist 'Indie & World

In [None]:
# Check how many songs are in each cluster
cluster_sizes = songs_df['cluster25'].value_counts()
print(cluster_sizes)

# To see which clusters have no tracks
empty_clusters = cluster_sizes[cluster_sizes == 0].index.tolist()
print(f"Empty clusters: {empty_clusters}")


cluster25
15    419
14    389
1     354
16    353
18    308
21    304
2     281
4     253
24    237
13    236
0     234
3     226
6     218
5     179
20    178
10    151
11    150
23    142
7     115
19    114
9      96
22     86
17     84
12     70
8      58
Name: count, dtype: int64
Empty clusters: []


## 5. Overwriting an existing playlist

Authentication

In [None]:
token = SpotifyOAuth(scope=scope,
                     username=username,
                     client_id=client_id,
                     client_secret=client_secret,
                     redirect_uri=redirectUri,
                     open_browser=False # this line is need in Colab, but not on local machine
                     )
spotifyObject = spotipy.Spotify(auth_manager = token)

In [None]:
with open("./playlists.pkl", "rb") as f:
    playlist_collection = pickle.load(f)
uris = [f'spotify:track:{str(id).strip()}' for id in songs_df.sample(10).id]
playlist_id = playlist_collection["my_playlist_cluster_0"] # We now replace the 5 songs in playlist 0 with 10 different songs
spotifyObject.user_playlist_replace_tracks(user=username, playlist_id=playlist_id, tracks=uris)

6. If needed, delete playlist that start with naming "my_playlist_cluster_"

In [None]:
# Get all playlists of the current user
playlists = spotifyObject.current_user_playlists(limit=50)

for playlist in playlists['items']:
    playlist_name = playlist['name']
    playlist_id = playlist['id']
    owner_id = playlist['owner']['id']

    # Only delete your own playlists that start with the desired prefix
    if playlist_name.startswith("my_playlist_cluster_") and owner_id == username:
        try:
            spotifyObject.current_user_unfollow_playlist(playlist_id)
            print(f"✅ Deleted playlist: {playlist_name}")
        except Exception as e:
            print(f"❌ Failed to delete {playlist_name}: {e}")


✅ Deleted playlist: my_playlist_cluster_22
✅ Deleted playlist: my_playlist_cluster_24
