# Assemble the project pipeline

## Import Libraries

In [10]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets # sklearn comes with some toy datasets to practise
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import config
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [11]:
#Initialize SpotiPy with user credentias
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

## Get features

In [None]:
# Get random song and scale its features (get features)
track = sp.search(q="Oops I did it again", type="track", limit=10 ,market="GB")
features = pd.DataFrame(sp.audio_features(track["tracks"]["items"][0]["id"]))
features = features[['danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
        'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']]
features


In [None]:
#load the scaling function with pickle
def load(filename = "pulpkokiscaler.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 
        
pulpscaler = load("pulpkokiscaler.pickle")
pulpscaler

In [None]:
#Apply the trained scaler to transform the user’s song audio features

scaledfeat = pulpscaler.transform(features)
print(scaledfeat)

In [None]:
#load the kmeans function with pickle
def load(filename = "pulpkokikmeans.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        print("File not found!") 
        
pulpkey = load("pulpkokikmeans.pickle")
pulpkey

In [None]:
keyfeat = pulpkey.predict(features)
print(keyfeat)

In [None]:
df_scaled2 = pd.read_csv('pulpkokicluster.csv')
df_scaled2

In [None]:
filtered_df = df_scaled2[(df_scaled2['cluster'] == 5)]
filtered_df

In [None]:
#Create a function to fit the K-Means clustering method using all the songs 
#contained in the Spotify collected data set.

In [None]:
#Use the trained K-means model to predict the cluster of each song in the internal databases 
#and add this information to the internal databases

# User Input final scenario

## Get the song name from the user as an input

In [None]:
user_song = input("Name of the song ")

In [None]:
track = sp.search(q="Somebody", type="track", limit=5)
n_songs = len(track['tracks']['items'])
for i in range(n_songs):
    artist = track['tracks']['items'][i]['artists'][0]['name']
    album =  track['tracks']['items'][i]['album']['name']
    print(i, "Artist: ", artist, "\n", "Album: ", album, "\n")

## Play the input song in music embed player

['spotify' https://open.spotify.com/track/7sLpSWxQazJzDVG6YGzlVs?si=ad11087c4b534a6c

In [1]:
from IPython.display import IFrame

#track_id = "7sLpSWxQazJzDVG6YGzlVs"

IFrame(src="https://open.spotify.com/embed/track/7sLpSWxQazJzDVG6YGzlVs",
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media",
      )

## Get the audio features for that song from spotify API. using sp.audio_features(trackid) . pay attention to keep only the audio features columns.

In [25]:
trackid = "7sLpSWxQazJzDVG6YGzlVs"
sp.audio_features(trackid)

[{'danceability': 0.494,
  'energy': 0.887,
  'key': 6,
  'loudness': -4.297,
  'mode': 0,
  'speechiness': 0.398,
  'acousticness': 0.00661,
  'instrumentalness': 0,
  'liveness': 0.103,
  'valence': 0.548,
  'tempo': 89.554,
  'type': 'audio_features',
  'id': '7sLpSWxQazJzDVG6YGzlVs',
  'uri': 'spotify:track:7sLpSWxQazJzDVG6YGzlVs',
  'track_href': 'https://api.spotify.com/v1/tracks/7sLpSWxQazJzDVG6YGzlVs',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/7sLpSWxQazJzDVG6YGzlVs',
  'duration_ms': 234627,
  'time_signature': 4}]

In [None]:
track = sp.search(q="Somebody", type="track", limit=5)
n_songs = len(track['tracks']['items'])
for i in range(n_songs):
    artist = track['tracks']['items'][i]['artists'][0]['name']
    album =  track['tracks']['items'][i]['album']['name']
    print(i, "Artist: ", artist, "\n", "Album: ", album, "\n")

## Using mode.predict(new scaled audio record for the new song) to predict the cluster (label) for the new song.

## Return random song from the same cluster   that the new song belongs to from your data frame and suggest it to the user

## Play it using embedded music player.