# GNOD 4

In [1]:
import numpy as np
import pandas as pd

from sklearn import cluster, datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pickle

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

### Load data


In [2]:
all_songs = pd.read_csv("all_songs.csv")
all_songs_numerical = pd.read_csv('all_songs_numerical.csv')
all_songs_numerical = all_songs_numerical.drop (['Unnamed: 0'], axis=1)

### getting scaler and Model

In [3]:
scaler = pickle.load(open ('scaler.pkl','rb'))
kmeans = pickle.load(open('kmeans.pkl','rb'))

### Connecting to Spotify

In [4]:
def storing_secrets(password):
    secrets_file = open(password,"r")
    string = secrets_file.read()
    secrets_dict={}
    for line in string.split('\n'):
        if len(line) > 0:
            secrets_dict[line.split(' -> ')[0]]=line.split(' -> ')[1].strip()
    return secrets_dict

In [5]:

secrets_dict = storing_secrets("Spotify_API.txt")
#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['Client ID'],
                                                           client_secret=secrets_dict['Client Secret']))

### Function to get features from Spotify

In [6]:
def features (track):
    track_id = sp.search(q='track:' + track, type='track')
    uri =track_id["tracks"]["items"][0]["id"]
    features = sp.audio_features(uri)
    return features

### Recommender

In [52]:
# Function:

# First: 
## It takes numerical variables from spotify_data (corresponding to audio features
## Select all X (no target yet)
## Scale values of X
## K-Means generate 6 clusters/segments (k=6 from elbow and 2nd max of silouhette)
## and fit the model with X scaled (X_prep)

# Then:
## If song_searched IN Spotify_data --> retrieve audio features for that song
## Scalate new audio features
## Check against the previous KMeans fitted model
## Obtain segment/cluster of searched song

# If the song is NOT IN Spotify data:
## send  to the Spotify API and get its audio features and store them.
## scale the audio features and get the cluster of the song
## Reccomend another random song from the same cluster

def get_song_cluster(new_song, spotify_data):
    from IPython.display import Markdown, display
    
    # Select X, but no target yet
    X = all_songs_numerical

    # Scaling Data
    X_prep = StandardScaler().fit_transform(spotify_data)

    # KMeans (k = 6) to create 6 clusters/segments
    kmeans = KMeans(n_clusters=6, random_state=1234)
    kmeans.fit(X_prep)
    
    # Predicting / assigning the clusters:
    clusters = kmeans.predict(X_prep)
    
    # Converting both searched song and 'title' in spotify_data to lower case
    new_song = new_song.lower() 
    all_songs['title'] = all_songs['title'].str.lower()

    # Checking if the input song is in the Spotify data
    if new_song in all_songs['title'].values:
        
        # Extracting the features for the input song 
        # (filtering through boolean mask and selecting first row of filtered)
        song_features = all_songs_numerical.loc[all_songs['title']==new_song].iloc[0]
        
        # Predicting the cluster:
        
        # Solving dimensional issue and need for array:   
        # Convert target_song to a numpy array and reshape to be a 2-dimensional array with one row
        song_features_arr = np.array(song_features).reshape(1, -1)
        
        # Scaling target_song values using the same X_prep scaler used for X data
        song_features_prep = StandardScaler().fit(X_prep).transform(song_features_arr)
        # Predicting / assigning the clusters using the same Kmeans fitted before:
        song_cluster = kmeans.predict(song_features_prep)
        
        # Recommending another random song from that cluster:
        
        # Selecting a random song from that cluster
        random_song_idx = np.random.choice(np.where(kmeans.labels_ == song_cluster)[0])
        
        # Getting title of random songs
        random_song = all_songs.iloc[random_song_idx]['title']
        
        display(Markdown(f"The song '**{new_song.capitalize()}**' belongs to cluster '**{song_cluster[0]}**'.\n\n"
            f"Here's a random song from the same cluster: '**{random_song.capitalize()}**'."))
         
    else:
        try:
            
            # Go the Spotify API and get its audio features.
            feature = features(new_song)
            column = list(feature[0].keys())
            values = [list(feature[0].values())]

            # Make a dataframe and drop unnecessary variables
            df_new_song = pd.DataFrame(data = feature, columns = column)
            df_new_song = df_new_song.drop(['id','uri','track_href', 'type','analysis_url'], axis =1)

            # scale the audio features and predict the cluster
            std_new_song = scaler.transform(df_new_song)
            new_cluster = kmeans.predict(std_new_song)

            # Selecting a random song from that cluster
            random_song_idx = np.random.choice(np.where(kmeans.labels_ == new_cluster)[0])

            # Getting title of random songs
            random_song = all_songs.iloc[random_song_idx]['title']

            return display(Markdown(f"'**{random_song.capitalize()}**': This is your recommendation! Enjoy it!"))
        
        except IndexError:
            
            # When we introduce a Incorrect title of a song
            print ("Sorry, we don´t have a reccomendation for you, try again")
            new_song = input("Introduce the name of a song: ").lower()
            get_song_cluster(new_song, all_songs_numerical)

In [53]:
all_songs['title']

0                                                   diles
1       cayó la noche (feat. cruz cafuné, abhir hathi,...
2                                                   limbo
3                                                 lovumba
4                                          no sigue modas
                              ...                        
5098                                  walking on sunshine
5099                          walking on the chinese wall
5100                                   we built this city
5101                    you spin me round (like a record)
5102                  you're my heart, you're my soul '98
Name: title, Length: 5103, dtype: object

In [54]:
# Input
new_song = input("Introduce the name of a song: ").lower()
# Applying function
get_song_cluster(new_song, all_songs_numerical)

Introduce the name of a song: gfhsdjgsdg
Sorry, we don´t have a reccomendation for you, try again
Introduce the name of a song: america
