In [None]:
pip install spotipy

Collecting spotipy
  Downloading spotipy-2.19.0-py3-none-any.whl (27 kB)
Collecting urllib3>=1.26.0
  Downloading urllib3-1.26.7-py2.py3-none-any.whl (138 kB)
[K     |████████████████████████████████| 138 kB 5.0 MB/s 
[?25hCollecting requests>=2.25.0
  Downloading requests-2.26.0-py2.py3-none-any.whl (62 kB)
[K     |████████████████████████████████| 62 kB 866 kB/s 
Installing collected packages: urllib3, requests, spotipy
  Attempting uninstall: urllib3
    Found existing installation: urllib3 1.24.3
    Uninstalling urllib3-1.24.3:
      Successfully uninstalled urllib3-1.24.3
  Attempting uninstall: requests
    Found existing installation: requests 2.23.0
    Uninstalling requests-2.23.0:
      Successfully uninstalled requests-2.23.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests~=2.23.0, but you have requests 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor


from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn import tree

In [None]:
CLIENT_ID     = '3ac63637251243ab9871628ef62018b4'
CLIENT_SECRET = 'b27c42db70e046b0b98ad6cfdde08a12'
PLAYLIST_ID   = '37i9dQZF1DWYJ5kmTbkZiz' # Spotify playlist id

# API Login
client_credentials_manager=SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) 

# Obtain Tracks of Playlist
tracks = sp.user_playlist_tracks(user='spotify', playlist_id=PLAYLIST_ID)
tracks_uri_list = [x['track']['uri'] for x in tracks['items']]

# Obtain Features of Tracks
features = []
for i in tracks_uri_list:
    features = features + sp.audio_features(i)

# Create Feature Dataframe
df = pd.DataFrame(features)

In [None]:
tracks_df = pd.DataFrame()

# Get Track Name
tracks_df['track_name'] = [x['track']['name'] for x in tracks['items']]

# Get Artists Names
artists_names = []
for x in tracks['items']:
    artists_names.append(', '.join(y['name'] for y in x['track']['artists']))
    
tracks_df['artists_names'] = artists_names

DATA PREPROCESSING

Since there were no missing data in each feature, no rows will be dropped. However, since there are some features that are not value adding to the modeling, those will be dropped. The features that will be droppped are: ```analysis_url```, ```id```, ```track_href```, ```type``` and ```uri```.

In [None]:
# Define Features to Drop
dropCols = ['analysis_url', 'id', 'track_href', 'type', 'uri']

# Drop Features
df.drop(dropCols, axis=1, inplace=True)

# Ensure Features are dropped
df.columns

Index(['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms', 'time_signature'],
      dtype='object')

In [None]:
#Split Data (Independent Variables [X] & Dependent Variable [y])

dependentVar = 'danceability' # Goal to predict danceability!

X = df.loc[:, df.columns != dependentVar] #Independent
y = df[dependentVar].values #Dependent

In [None]:
features = X.columns.tolist() #features that we will use to predict danceability
print(features)

['energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']


In [None]:
#Feature Scaling
sc = StandardScaler()
X  = sc.fit_transform(X) #scaled all the features so that they are in the same range

Split Data (Train & Test)

Since the dataset size is 50 rows, we will dedicate 40 for training and 10 for testing. This means our testing size will be 0.20 to have 20% of 50 to be in testing and 80% of 50 to be in training.

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.20, random_state=1234)

## <font color='orange'> DATA MODELING - PART 1 - UNSUPERVISED LEARNING </font>

In [32]:
#Feature Scaling 
minMax    = MinMaxScaler()
clust_df  = minMax.fit_transform(df)

In [33]:
#Build a model
kMeansModel = KMeans(n_clusters=3, max_iter=1000, random_state=50).fit(clust_df)
df['cluster'] = kMeansModel.labels_

In [34]:
cluster_songs = {}
for (index, row), clust in zip(tracks_df.iterrows(), df['cluster']):
    if clust not in cluster_songs:
        cluster_songs[clust] = []
    cluster_songs[clust].append('Song: {} by {}'.format(row['track_name'], row['artists_names']))


for clust, songs in cluster_songs.items():
    print(clust)
    for song in songs:
        print("\t", song)
    print("\n")

0
	 Song: One Dance by Drake, WizKid, Kyla
	 Song: Sunflower - Spider-Man: Into the Spider-Verse by Post Malone, Swae Lee
	 Song: Can't Hold Us - feat. Ray Dalton by Macklemore & Ryan Lewis
	 Song: Somebody That I Used To Know by Gotye, Kimbra
	 Song: Wake Me Up by Avicii
	 Song: Cheerleader - Felix Jaehn Remix Radio Edit by OMI, Felix Jaehn
	 Song: Thinking out Loud by Ed Sheeran
	 Song: Despacito by Luis Fonsi, Daddy Yankee
	 Song: Old Town Road - Remix by Lil Nas X, Billy Ray Cyrus
	 Song: Danza Kuduro by Don Omar, Lucenzo
	 Song: Uptown Funk (feat. Bruno Mars) by Mark Ronson, Bruno Mars
	 Song: Despacito - Remix by Luis Fonsi, Daddy Yankee, Justin Bieber
	 Song: Whistle by Flo Rida
	 Song: In My Feelings by Drake


1
	 Song: Lean On (feat. MØ & DJ Snake) by MØ, DJ Snake, Major Lazer
	 Song: 7 rings by Ariana Grande
	 Song: Shape of You by Ed Sheeran
	 Song: Grenade by Bruno Mars
	 Song: Señorita by Shawn Mendes, Camila Cabello
	 Song: Airplanes (feat. Hayley Williams of Paramore) b