# Get a spotify song recommendation

In [3]:
from ScrapeLast10weeksBillboard import time_to_scrape
from CheckHotNot import load_billboard_csv
from CheckHotNot import check_if_hot
from CheckHotNot import get_random_hot_song
from songrecommender import show_if_hot
from songrecommender import get_song_df, showID_in_player
from scalingclustering import load_pkl, predict_kmeans
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import GSA
import config
import pandas as pd
from IPython.display import IFrame
from harmonicneighbours import translate_pc_cam, translate_cam_pc, harmonic_sibblings

## 1. Check if we should scrape the Billboard.

Is it time to scrape the billboard100 songs? 

In [4]:
time_to_scrape()

Our last data is from 2022-04-23


Would you like to get the current Top 100 now?(Y/N) n


## 2. Get song title and check if it is hot right now:

Get song input from user and check if it is in the Billboard Top100:

In [5]:
result, user_input =  check_if_hot()

Give me your song title! woman


No song similar to woman is hot right now.


If the song is hot another hot song is randomly selected. Alternatively we search for the title on spotify.

In [6]:

song_df = show_if_hot(result, user_input['title'])
song_df

    

not hot process


Unnamed: 0,TrackName,TrackID,SampleURL,ReleaseYear,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,key,mode,duration_ms,Popularity
0,Woman,6Uj1ctrBOjOas8xZXGqKk4,https://open.spotify.com/track/6Uj1ctrBOjOas8x...,2021-06-25,0.824,0.764,-4.175,0.0854,0.0888,0.00294,0.117,0.881,107.998,5,0,172627,91


## 3. import pickle model

We load the model to apply it to the song we found on spotify.

In [7]:
scaler = load_pkl(filename='model/scalerKmeans.pickle')
kmeans = load_pkl(filename = 'model/modelKmeans.pickle')
X = load_pkl(filename=  'model/X.pickle')
X_scaled_df = load_pkl(filename ='model/X_scaled_df')
sp_library = load_pkl(filename ='model/sp_library.pickle')

## 4. scale song

We scale the song and predict the cluster: 

In [9]:
song_df['Year'] = pd.to_datetime(song_df['ReleaseYear'], format='%Y-%m-%d')
song_df['Year'] = pd.DatetimeIndex(song_df['Year']).year

In [10]:
song_df_scaled = scaler.transform(song_df[X_scaled_df.columns])

In [11]:
song_label, song_cluster = predict_kmeans(song_df_scaled, X, kmeans)



we add the cluster as a label

In [12]:
song_df['label'] = song_label

## 5. Get song suggestion from same cluster and show spotify player

### Key/mode: Get harmonic neighbours:

In [13]:
song_key = song_df['key'].iloc[0]
song_mode = song_df['mode'].iloc[0]
song_key, song_mode

(5, 0)

Translate key and mode to camelot:

In [14]:
key_cam = translate_pc_cam(song_key, song_mode)
#key_cam = key_cam[0]
key_cam

'07B'

Get neighbours:

In [15]:
neighbours = harmonic_sibblings(key_cam)
neighbours

['08B', '06B', '01B', '12A']

Translate back to key/mode pairs:

In [16]:
pos_key_modes = [translate_cam_pc(neighbour) for neighbour in neighbours]
pos_key_modes

C major
['A#', 'Bb'] major
B major
['C#', 'Db'] minor


[([0], [0]), ([10], [0]), ([11], [0]), ([1], [1])]

Filter cluster for possible key/mode combination:

In [17]:
song_suggest_cluster = sp_library[sp_library['label']==song_df['label'][0]]

In [18]:
filtered_df = pd.DataFrame(columns=song_suggest_cluster.columns) 
for i in pos_key_modes:
    filtered_df = pd.concat([filtered_df, song_suggest_cluster.loc[(song_suggest_cluster['key']==i[0][0]) & (song_suggest_cluster['mode']==i[1][0]), :]])
song_suggest_cluster = filtered_df    

### Filter for tempo:

In [19]:
song_df['tempo'][0]+8, song_df['tempo'][0] - 8 

(115.998, 99.998)

In [20]:
song_suggest_cluster = song_suggest_cluster[(song_suggest_cluster['tempo']<(song_df['tempo'][0] + 8)) & (song_suggest_cluster['tempo']>(song_df['tempo'][0] - 8 ))]

## Select random song from filtered dataframe

In [21]:
song_suggest =  song_suggest_cluster.sample()
song_suggest_TrackID = song_suggest['TrackID'].iloc[0]

In [22]:
song_suggest_TrackID

'0e3yhVeNaTfKIWQRw9U9sY'

## Comparing songs

In [23]:
df_compair = pd.concat([song_df, song_suggest], axis=0)
df_compair.iloc[:,0:9]

Unnamed: 0,TrackName,TrackID,SampleURL,ReleaseYear,danceability,energy,loudness,speechiness,acousticness
0,Woman,6Uj1ctrBOjOas8xZXGqKk4,https://open.spotify.com/track/6Uj1ctrBOjOas8x...,2021-06-25,0.824,0.764,-4.175,0.0854,0.0888
30,Sacrifice,0e3yhVeNaTfKIWQRw9U9sY,https://p.scdn.co/mp3-preview/78a65b01e8e577c3...,1989-08-29,0.756,0.422,-12.967,0.0292,0.0165


In [24]:
df_compair.iloc[:,10:19]

Unnamed: 0,liveness,valence,tempo,key,mode,duration_ms,Popularity,Year,label
0,0.117,0.881,107.998,5,0,172627,91.0,2021.0,0
30,0.0368,0.487,112.751,11,0,304133,71.0,1989.0,0


In [25]:
player_user_choice = showID_in_player(song_df.loc[0, 'TrackID'])
print(f'This is your recommendation:')
player_user_choice

This is your recommendation:


In [26]:
player_recommendation = showID_in_player(song_suggest_TrackID)
print(f'This is your recommendation:')
player_recommendation


This is your recommendation:


## Further Steps to implement:
### 1 Create playlist/put song into user playlist
    - Did you like the song/did the song fit? 
        if yes -> select playlist and save it
        else -> (remove song from cluster?)
                recommend new song
### 2 Audio analysis, graphical representation of songs/clusters
### 3 Implement loops in order to not start from the beginning everytime
### 4 Scrape billboard: Only scrape the weeklycharts that do not yet exist in the data base

### 5 GUI

### 6 Get audio features of the Top 100 (+10weeks) in order to be able to cluster them and give a simmilar song recommendation of songs that are currently in the charts