In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets # sklearn comes with some toy datasets to practise
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import config
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=config.client_id, client_secret=config.client_secret))

In [3]:
def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 

In [4]:
# load data
cluster_id=pd.read_csv('cluster_id.csv')

In [5]:
# Loading the dataset with hot songs
hot_songs = pd.read_csv('music_list.csv')

In [6]:
hot_songs

Unnamed: 0.1,Unnamed: 0,title,artist_name
0,0,We Don't Talk About Bruno,"Carolina Gaitan, Mauro Castillo, Adassa, Rhenz..."
1,1,Do We Have A Problem?,Nicki Minaj X Lil Baby
2,2,Easy On Me,Adele
3,3,Heat Waves,Glass Animals
4,4,Stay,The Kid LAROI & Justin Bieber
...,...,...,...
95,95,Iffy,Chris Brown
96,96,When I'm Gone,Alesso / Katy Perry
97,97,Fair Trade,Drake Featuring Travis Scott
98,98,Megan's Piano,Megan Thee Stallion


In [7]:
hot_songs.drop(columns =["Unnamed: 0"], inplace=True)

In [8]:
# User input and checking if the song is in the hot list.

song_input=input("Please enter a name of a song: ").title()

if song_input in hot_songs.values:
    print()
    print("Your song is in the hot songs list!")
    print("Here is one song recommmendation to listen: ")
    print()
    print(hot_songs.sample())
else:
    print("Please, check if you have any typos in the song name. If not, unfortunately, the song is not in the hot list.")

Please enter a name of a song: I will survive
Please, check if you have any typos in the song name. If not, unfortunately, the song is not in the hot list.


## Get the futures of the input song

In [9]:
input_song=sp.search(q=song_input, limit=1, market="GB")

In [10]:
input_song

{'tracks': {'href': 'https://api.spotify.com/v1/search?query=I+Will+Survive&type=track&market=GB&offset=0&limit=1',
  'items': [{'album': {'album_type': 'album',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/6V6WCgi7waF55bJmylC4H5'},
       'href': 'https://api.spotify.com/v1/artists/6V6WCgi7waF55bJmylC4H5',
       'id': '6V6WCgi7waF55bJmylC4H5',
       'name': 'Gloria Gaynor',
       'type': 'artist',
       'uri': 'spotify:artist:6V6WCgi7waF55bJmylC4H5'}],
     'external_urls': {'spotify': 'https://open.spotify.com/album/3srNwhDkAdDQ1GwvtToENg'},
     'href': 'https://api.spotify.com/v1/albums/3srNwhDkAdDQ1GwvtToENg',
     'id': '3srNwhDkAdDQ1GwvtToENg',
     'images': [{'height': 640,
       'url': 'https://i.scdn.co/image/ab67616d0000b273bce36707076282a49639d8b5',
       'width': 640},
      {'height': 300,
       'url': 'https://i.scdn.co/image/ab67616d00001e02bce36707076282a49639d8b5',
       'width': 300},
      {'height': 64,
       'url': 'htt

In [11]:
input_song_features = sp.audio_features(input_song["tracks"]["items"][0]["id"] )

In [12]:
input_song_features

[{'danceability': 0.777,
  'energy': 0.736,
  'key': 0,
  'loudness': -10.082,
  'mode': 1,
  'speechiness': 0.037,
  'acousticness': 0.00755,
  'instrumentalness': 0.0136,
  'liveness': 0.302,
  'valence': 0.835,
  'tempo': 116.375,
  'type': 'audio_features',
  'id': '5f22DmcDGpsaG5RbUzVRow',
  'uri': 'spotify:track:5f22DmcDGpsaG5RbUzVRow',
  'track_href': 'https://api.spotify.com/v1/tracks/5f22DmcDGpsaG5RbUzVRow',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/5f22DmcDGpsaG5RbUzVRow',
  'duration_ms': 481813,
  'time_signature': 4}]

In [13]:
input_song_features=pd.DataFrame(input_song_features)
input_song_features_num=input_song_features._get_numeric_data()

In [14]:
input_song_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.777,0.736,0,-10.082,1,0.037,0.00755,0.0136,0.302,0.835,116.375,audio_features,5f22DmcDGpsaG5RbUzVRow,spotify:track:5f22DmcDGpsaG5RbUzVRow,https://api.spotify.com/v1/tracks/5f22DmcDGpsa...,https://api.spotify.com/v1/audio-analysis/5f22...,481813,4


In [15]:
input_song_features_num

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,0.777,0.736,0,-10.082,1,0.037,0.00755,0.0136,0.302,0.835,116.375,481813,4


In [16]:
input_song_features_num = input_song_features_num.drop(columns = ["key", "mode", "time_signature"])

In [17]:
input_song_features_num

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,0.777,0.736,-10.082,0.037,0.00755,0.0136,0.302,0.835,116.375,481813


In [18]:
model=load("scaler.pickle")

In [19]:
model

StandardScaler()

In [20]:
input_song_scaled = model.transform(input_song_features_num)

In [21]:
input_song_scaled = pd.DataFrame(input_song_scaled, columns = ["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness",
                          "valence", "tempo", "duration_ms"])

In [22]:
model_kmeans = load("kmeans_7.pickle")

In [23]:
input_song_cluster = model_kmeans.predict(input_song_scaled)

In [24]:
input_song_cluster

array([2], dtype=int32)

In [25]:
model_kmeans

KMeans(n_clusters=7, random_state=1234)