# Assemble the project pipeline

## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets # sklearn comes with some toy datasets to practise
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import config
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

## Get features

In [3]:
# Get random song and scale its features (get features)
track = sp.search(q="Oops I did it again", type="track", limit=10 ,market="GB")
features = pd.DataFrame(sp.audio_features(track["tracks"]["items"][0]["id"]))
features = features[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
        'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']]
features


Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,0.751,0.834,-5.444,0.0437,0.3,1.8e-05,0.355,0.894,95.053,211160


In [4]:
#load the scaling function with pickle
def load(filename = "pulpkokiscaler.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 
        
pulpscaler = load("pulpkokiscaler.pickle")
pulpscaler

StandardScaler()

In [5]:
#Apply the trained scaler to transform the user’s song audio features

scaledfeat = pulpscaler.transform(features)
print(scaledfeat)

[[ 0.9862179   0.77732441  0.628762   -0.24845721  0.49040464 -0.26965034
   1.04449137  1.31864502 -0.97191509 -0.56855426]]


In [6]:
#load the kmeans function with pickle
def load(filename = "pulpkokikmeans.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 
        
pulpkey = load("pulpkokikmeans.pickle")
pulpkey

KMeans(n_clusters=10, random_state=1234)

In [7]:
keyfeat = pulpkey.predict(features)
print(keyfeat)

[5]


In [8]:
df_scaled2 = pd.read_csv('pulpkokicluster.csv')
df_scaled2

Unnamed: 0.1,Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,cluster
0,0,-1.412265,1.388068,0.455862,0.084183,-0.816597,-0.269260,-0.554382,0.078550,0.045338,0.503697,0
1,1,0.714435,0.772149,-1.485355,0.547572,-0.755410,-0.269743,0.718447,0.766603,-0.667191,-0.694534,7
2,2,-2.241202,0.161405,0.654205,-0.258071,-0.794145,-0.267045,0.078897,-1.565575,0.607197,-0.257296,0
3,3,-2.016981,-0.180198,-0.397649,-0.415739,-0.749303,-0.231205,1.119732,-1.009533,1.731555,0.944540,0
4,4,-0.209626,-0.666722,0.035757,-0.217693,-0.368928,-0.269757,-0.485411,-0.241474,-0.960328,-0.230188,2
...,...,...,...,...,...,...,...,...,...,...,...,...
1610,1611,0.816354,-1.215356,-0.458944,-0.538796,1.306116,-0.269757,-0.664736,-1.081538,0.357398,-0.089392,8
1611,1612,-0.230010,-2.457547,-0.807924,-0.486881,2.518779,-0.269766,1.050761,-0.473492,-1.535618,0.042972,8
1612,1613,0.218432,0.761797,1.062457,2.410742,-0.695649,-0.269746,0.555424,0.482581,2.037145,-0.379687,3
1613,1614,1.122109,-0.019748,0.502412,3.218308,0.390076,-0.269689,3.094812,0.974619,-0.035207,0.463679,3


In [9]:
filtered_df = df_scaled2[(df_scaled2['cluster'] == 5)]
filtered_df

Unnamed: 0.1,Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,cluster
15,15,-2.173256,-0.951391,-1.926278,-0.402279,-0.233703,-0.269468,-0.504221,-0.485493,-0.817333,1.539948,5
48,48,-2.1189,-0.221604,-0.490749,-0.529183,-0.747122,1.165669,-0.479141,-1.353559,-1.54728,4.591133,5
57,57,1.088136,0.896368,-0.297321,-0.511878,-0.637633,2.118275,-1.000813,1.610667,0.417365,3.951244,5
81,81,-0.399874,-1.500025,-0.628664,-0.584943,-0.590522,-0.269724,-0.682919,-1.577576,0.808165,2.251395,5
90,90,-1.622896,-1.199829,-0.810526,-0.498418,-0.664242,-0.25489,3.138702,-1.50157,-0.294261,4.348479,5
120,120,-0.54256,-1.650123,-1.441697,-0.550333,-0.480161,-0.269678,-0.7939,-1.245551,0.38994,2.337149,5
127,127,-0.073735,-1.3344,-0.61999,-0.540719,0.45987,-0.269757,-0.109206,-1.185546,-0.40272,1.416509,5
130,130,-2.832329,-1.520728,-1.988731,-0.40997,0.302835,-0.268774,-0.41644,-0.97753,1.831174,1.967674,5
191,191,-0.739602,-1.732936,-1.845033,-0.517646,-0.412985,-0.087074,0.442563,-0.573499,-0.268077,1.88446,5
225,225,-0.624095,-1.779518,-1.492005,-0.05618,-0.81203,-0.156888,0.154138,-1.181546,0.944426,0.855403,5


In [None]:
#Create a function to fit the K-Means clustering method using all the songs 
#contained in the Spotify collected data set.

In [None]:
#Use the trained K-means model to predict the cluster of each song in the internal databases 
#and add this information to the internal databases

# User Input final scenario

## Get the song name from the user as an input

In [None]:
user_song = input("Name of the song ")

In [None]:
track = sp.search(q="Somebody", type="track", limit=5)
n_songs = len(track['tracks']['items'])
for i in range(n_songs):
    artist = track['tracks']['items'][i]['artists'][0]['name']
    album =  track['tracks']['items'][i]['album']['name']
    print(i, "Artist: ", artist, "\n", "Album: ", album, "\n")

## Play the input song in music embed player

['spotify' https://open.spotify.com/track/7sLpSWxQazJzDVG6YGzlVs?si=ad11087c4b534a6c

In [1]:
from IPython.display import IFrame

#track_id = "7sLpSWxQazJzDVG6YGzlVs"

IFrame(src="https://open.spotify.com/embed/track/7sLpSWxQazJzDVG6YGzlVs",
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
      )

## Get the audio features for that song from spotify API. using sp.audio_features(trackid) . pay attention to keep only the audio features columns.

In [25]:
trackid = "7sLpSWxQazJzDVG6YGzlVs"
sp.audio_features(trackid)

[{'danceability': 0.494,
  'energy': 0.887,
  'key': 6,
  'loudness': -4.297,
  'mode': 0,
  'speechiness': 0.398,
  'acousticness': 0.00661,
  'instrumentalness': 0,
  'liveness': 0.103,
  'valence': 0.548,
  'tempo': 89.554,
  'type': 'audio_features',
  'id': '7sLpSWxQazJzDVG6YGzlVs',
  'uri': 'spotify:track:7sLpSWxQazJzDVG6YGzlVs',
  'track_href': 'https://api.spotify.com/v1/tracks/7sLpSWxQazJzDVG6YGzlVs',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/7sLpSWxQazJzDVG6YGzlVs',
  'duration_ms': 234627,
  'time_signature': 4}]

In [None]:
track = sp.search(q="Somebody", type="track", limit=5)
n_songs = len(track['tracks']['items'])
for i in range(n_songs):
    artist = track['tracks']['items'][i]['artists'][0]['name']
    album =  track['tracks']['items'][i]['album']['name']
    print(i, "Artist: ", artist, "\n", "Album: ", album, "\n")

## Using mode.predict(new scaled audio record for the new song) to predict the cluster (label) for the new song.

## Return random song from the same cluster   that the new song belongs to from your data frame and suggest it to the user

## Play it using embedded music player.