# Import Packages

In [12]:
!pip install plotly



In [13]:
import spotipy

from sklearn.cluster import KMeans, DBSCAN

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.preprocessing import MinMaxScaler

import creds

## Get User Saved Tracks

In [14]:
# Get all tracks in a User's Saved Tracks
offset = 0
user_tracks = []
while True:
    track_batch = creds.session.current_user_saved_tracks(limit = 50, offset = offset)["items"]
    for item in track_batch:
        user_tracks.append(item)
        
    if len(track_batch) < 50:
        break
        
    offset += 50

## Get Track Features of Library

In [15]:
# Create Spotify session
session = creds.session

In [16]:
# Get track ids from user library
track_ids = []
for track in user_tracks:
    track_ids.append(track["track"]["id"])

In [17]:
# Get track features of the User's Library
offset = 0
library_features = []
while True:
    id_batch = track_ids[offset: offset + 50]
    feature_batch = session.audio_features(tracks = id_batch)
    for item in feature_batch:
        library_features.append(item)
        
    if len(feature_batch) < 50:
        break
        
    offset += 50

In [18]:
# Create DataFrame of Library audio features
df = pd.DataFrame(library_features)
df.set_index("id", inplace = True)

df.head()

Unnamed: 0_level_0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0O5DPkLlre5yNwH6Sqm2dD,0.555,0.505,1,-7.334,0,0.0333,0.0519,0.00361,0.148,0.293,117.481,audio_features,spotify:track:0O5DPkLlre5yNwH6Sqm2dD,https://api.spotify.com/v1/tracks/0O5DPkLlre5y...,https://api.spotify.com/v1/audio-analysis/0O5D...,309853,4
1VSuFS7PahCN3SWbOcQ98m,0.654,0.875,1,-4.845,1,0.0744,0.000417,0.0086,0.194,0.522,99.975,audio_features,spotify:track:1VSuFS7PahCN3SWbOcQ98m,https://api.spotify.com/v1/tracks/1VSuFS7PahCN...,https://api.spotify.com/v1/audio-analysis/1VSu...,171621,4
3bGiy58RXst6Cem4VkZSPX,0.872,0.847,1,-3.103,0,0.0749,0.285,0.00872,0.111,0.797,134.995,audio_features,spotify:track:3bGiy58RXst6Cem4VkZSPX,https://api.spotify.com/v1/tracks/3bGiy58RXst6...,https://api.spotify.com/v1/audio-analysis/3bGi...,156578,4
1fah1uAs7HeTYDlNftKr3K,0.399,0.492,6,-10.777,0,0.0538,0.058,0.00101,0.11,0.159,91.066,audio_features,spotify:track:1fah1uAs7HeTYDlNftKr3K,https://api.spotify.com/v1/tracks/1fah1uAs7HeT...,https://api.spotify.com/v1/audio-analysis/1fah...,210463,4
4oNRSiodl43zieCRCVwbeO,0.59,0.632,7,-9.872,1,0.0496,0.0921,1e-05,0.122,0.401,84.753,audio_features,spotify:track:4oNRSiodl43zieCRCVwbeO,https://api.spotify.com/v1/tracks/4oNRSiodl43z...,https://api.spotify.com/v1/audio-analysis/4oNR...,188918,4


In [19]:
# Scale the tempo variable between 1 and 0
mms = MinMaxScaler()
mms.fit_transform(df[["tempo"]])

# Assign scaled tempo value
df.loc[:, "tempo"] = mms.fit_transform(df[["tempo"]])

In [21]:
# Plot library
px.scatter_3d(df,
              x = "energy",
              y = "valence",
              z = "tempo",
              color = "danceability")

The 3-D cluster for my Spotify Library Audio Features is incredibly dense. If I were to make a playlist based on recommendation clusters, it would be difficult to isolate the example tracks. I used a DBSCAN to identify smaller clusters that were perhaps more indicative of music taste.

In [22]:
library_array = np.array(df.loc[: ,["danceability", "energy", "valence", "tempo", "acousticness"]])

In [23]:
db = DBSCAN(eps = .08).fit(library_array)

df["label"] = db.labels_
df["label"].value_counts()

-1     1689
 1      284
 6       16
 0       15
 12      10
 14      10
 5        9
 2        7
 3        7
 13       6
 4        6
 7        6
 10       6
 16       6
 17       5
 18       5
 8        5
 22       5
 15       5
 24       5
 23       5
 19       5
 25       5
 20       4
 9        4
 11       4
 21       3
Name: label, dtype: int64

In [25]:
px.scatter_3d(df.loc[df["label"] > 0],
              x = "valence",
              y = "energy",
              z = "tempo",
              color = "label")

## Get Track Features for Recent Songs

In [26]:
# Get recent tracks
recent_tracks = []
for track in session.current_user_top_tracks(limit = 50, time_range = "short_term")["items"]:
    recent_tracks.append(track["id"])

In [27]:
# Create DataFrame of Recent Track audio features
df = pd.DataFrame(session.audio_features(tracks = recent_tracks))
df.set_index("id", inplace = True)

In [28]:
# Scale the tempo variable between 1 and 0
mms = MinMaxScaler()
mms.fit_transform(df[["tempo"]])

# Assign scaled tempo value
df.loc[:, "tempo"] = mms.fit_transform(df[["tempo"]])

In [30]:
px.scatter_3d(df,
              x = "danceability",
              y = "valence",
              z = "energy",
              color = "tempo")

## Create Clusters for Recent Tracks

In [31]:
library_array = np.array(df.loc[: ,["danceability", "energy", "valence", "tempo", "acousticness"]])

In [32]:
# Create clusters for recent tracks
db = DBSCAN(eps = .244, min_samples = 2).fit(library_array)

df["label"] = db.labels_
df["label"].value_counts()

-1    16
 6    11
 3     5
 4     4
 2     4
 1     3
 5     3
 7     2
 0     2
Name: label, dtype: int64

In [33]:
px.scatter_3d(df.loc[df["label"] > 0],
              x = "valence",
              y = "energy",
              z = "tempo",
              color = "label")

In [114]:
# Get two random tracks from each cluster as the basis for the recommender
playlist_groups = []
for lab in range(6):
    group_df = df.loc[df["label"] == lab]
    playlist_groups.append(list(group_df.sample(2).index))

In [130]:
# Feed seed tracks into the Spotify Recommender
for x, y in enumerate(playlist_groups):
    recs = session.recommendations(seed_tracks = y)
    
    rec_songs = []
    for j in range(len(recs["tracks"])):
        rec_songs.append(recs["tracks"][j]["id"])
    
    res = session.user_playlist_create(creds.user_id, name = f"S.O.N.G. {x}")
    session.user_playlist_add_tracks(creds.user_id, res["id"], rec_songs)

The examination of recently played tracks, their clustering, and the resulting playlists, most closely resemble Spotify's Daily Mix playlists. But you made this one, so that's cool!