## SearchEngine Class

In [67]:
# Data Manipulation
import numpy as np
import pandas as pd

class SearchEngine():
    def __init__(self, data):
        '''
        SearchEngine class
        Input: a songs dataset extracted from spotify API
        '''
        self.data = data

    def target_song(self, title='', artist=''):
        '''
        Search engine function for the target song
        Input: song title and/or artist
        Output: self.target dataset song
        '''
        # transform input strings in lowercase
        title = str(title).lower()
        artist = str(artist).lower()
        # filter self.data on the desired song
        if title != '' and artist != '':
            self.target = self.data[self.data[
                'artists'].str.lower().str.contains(artist)]
            self.target = self.target[self.target[
                'name'].str.lower().str.contains(title)]
        elif title != '':
            self.target = self.data[self.data[
                'name'].str.lower().str.contains(title)]
        elif artist != '':
            self.target = self.data[self.data[
                'artists'].str.lower().str.contains(artist)]
        else:
            print('Please select a song title and artist')
        # keep only the first song result
        self.target = self.target.head(1)
        self.artist = self.target["artists"].to_string(
            index=False).strip("['").strip("']")
        self.title = self.target["name"].to_string(index=False)
        print(f'TITLE: {self.title}')
        print(f'ARTIST: {self.artist}')

## Preprocessor Class

In [68]:
# Data Manipulation
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
from sklearn.utils import shuffle

# Pipeline and Column Transformers
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn import set_config
set_config(display = "diagram")

# Scaling
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler

# Package classes
# from music_similarity.search_engine import SearchEngine

class Preprocessor():
    def __init__(self, se):
        '''
        Preprocessor class
        Input: a songs dataset extracted from spotify API
        '''
        self.se = se

    def scale_data(self):
        '''
        Adapting data function
        '''
        # drop non numerical features before scaling
        self.X=self.se.data.drop(columns=['name','artists'])
        self.X_target=self.se.target.drop(columns=['name','artists'])
        # fit and transofrm with MinMaxScaler
        mmscaler = MinMaxScaler().fit(self.X)
        self.X_mmscaled=mmscaler.transform(self.X)
        self.X_target_mmscaled=mmscaler.transform(self.X_target)
        # fit and transofrm with RobustScaler
        roscaler = RobustScaler().fit(self.X)
        self.X_roscaled=roscaler.transform(self.X)
        self.X_target_roscaled=roscaler.transform(self.X_target)

## Playlist Class

In [69]:
# Data Manipulation
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

# Unsupervised Learning
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# K-nn modelling
from sklearn.neighbors import NearestNeighbors

# Package classes
# from music_similarity.search_engine import SearchEngine
# from music_similarity.preprocessor import Preprocessor

class Playlist():
    def __init__(self, preprocessor, se):
        '''
        Extractor class
        Input: preprocessor class dataset extracted from spotify API
        '''
        self.preprocessor = preprocessor
        self.se = se
        self.playlist_songs = 10

    def build_model(self):
        '''
        Model builder function
        '''
        # Define the model
        self.model=NearestNeighbors(
            n_neighbors=self.playlist_songs + 1).fit(
            self.preprocessor.X_mmscaled)
        # Extract index and distance of self.playlist_songs+1
        # number of colest songs
        self.distance, self.index=self.model.kneighbors(
            self.preprocessor.X_target_mmscaled,
            n_neighbors=self.playlist_songs + 1)
        # Copy found index rows from the original not scaled dataset
        self.playlist = self.se.data.iloc[self.index[0],:]
        self.playlist['distance'] = self.distance[0]
        # Remove the target song from the list
        self.playlist = self.playlist.tail(self.playlist_songs)
        # Ordering the playlist on distance, ascending order
        self.playlist = self.playlist.sort_values(
            by=['distance'], ascending=True, ignore_index=True)
        # Drop not necessary columns
        self.playlist = self.playlist[['name', 'artists', 'distance']]
        # Strip square brackets from the artists strings
        self.playlist['artists'] = self.playlist['artists'].apply(
            lambda x: x.strip("['").strip("']"))
        # Set starting index from 0 to 1
        self.playlist.index += 1

In [70]:
if 'se' in globals():
    del se
spotify = pd.read_csv('../raw_data/ML_spotify_data.csv')
se = SearchEngine(spotify)
se.target_song("f", "u2")

if 'preprocessor' in globals():
    del preprocessor
preprocessor = Preprocessor(se)
preprocessor.scale_data()

if 'playlist' in globals():
    del playlist
playlist = Playlist(preprocessor, se)
playlist.build_model()

TITLE: A Sort Of Homecoming - Live
ARTIST: U2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.playlist['distance'] = self.distance[0]


In [71]:
playlist.playlist

Unnamed: 0,name,artists,distance
1,You've Got Another Thing Coming - Live from th...,Judas Priest,0.171471
2,Breaking The Silence - Remastered 2003,Queensrÿche,0.223612
3,"It Ain't Me, Babe - Live at LA Forum, Inglewoo...",Bob Dylan,0.232693
4,"Blasphemous Rumours - Live at Rose Bowl, Pasad...",Depeche Mode,0.236184
5,Feats Don't Fail Me Now - Live at Lisner Audit...,Little Feat,0.238395
6,Shoot Shoot - Live / 2008 Remaster,UFO,0.239608
7,"Nutrocker - Live At Newcastle City Hall, 26.3....","Emerson, Lake & Palmer",0.244874
8,Comin' Atcha Live / Truckin' - Live At The Tro...,Tesla,0.245215
9,Battle Angels,Sanctuary,0.269785
10,Baba O'Riley - Live At Shepperton,The Who,0.270837


In [34]:
for artist in se.data.artists.unique():
    print(artist)

['Badfinger']
['Elton John']
['The Guess Who']
['Joe Cocker']
['Van Morrison']
['Dorothy Ashby']
['Kishore Kumar', 'Rajesh Khanna', 'Asha Parekh']
['Gentle Giant']
['Derek & The Dominos', 'Andy Johns']
['The Doors']
['Linda Perhacs']
['Harry Nilsson']
['The Marbles']
['The Who']
['Andy Williams']
['Steppenwolf']
['Fabrizio De André']
['Faces']
['Maynard Ferguson']
['José Alfredo Jimenez']
['The Orchestra Soledad']
['Yusuf / Cat Stevens']
['Curtis Mayfield']
['Free']
['Ludwig van Beethoven', 'Glenn Gould']
['Cactus']
['Angel Guaraca']
['Exuma']
['The Moody Blues']
['Giuseppe Verdi', 'Leontyne Price', 'Zubin Mehta', 'New Philharmonia Orchestra']
['Jimmy Cliff']
['Savoy Brown']
['Mountain']
['Focus']
['John Williams', 'Boston Pops Orchestra']
['Lucio Battisti']
['Johnny Cash']
['Abdel Halim Hafez']
['Bobby Vinton']
['Tom Zé']
['Frank Zappa']
['Jethro Tull']
['John Cale']
['Lata Mangeshkar', 'Kishore Kumar']
['The Byrds']
['Aretha Franklin']
['Christoph Willibald Gluck', 'Orchestra of the 

In [30]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# TO DO: I'LL TURN THIS INTO A CLASS LATER

class ApiExtractor:
    def __init__(self):
        '''
        Extractor class
        Input: preprocessor class dataset extracted from spotify API
        '''
        # credentials need to be exported via the command line
        self.auth_manager = SpotifyClientCredentials()
        self.sp_connection = spotipy.Spotify(auth_manager=self.auth_manager)
    
    def get_track_attrs(self, artist, title):
        # get track attributes
        self.ta_response = self.sp_connection.search(
            q="artist:" + artist + " track:" + title,
            type="track",
            limit=1)

        # parse attributes from track
        self.track = self.ta_response['tracks']['items'][0]
        self.track_name = self.track['name']
        self.track_uri = self.track['uri']
        self.track_popularity = self.track['popularity']
        self.track_explicit = self.track['explicit']
        self.track_artists = [artist['name'] for artist in self.track['artists']]

        # get track features
        self.track_features = self.sp_connection.audio_features(tracks = self.track_uri)[0]

        return [self.track_name, self.track_artists, self.track_popularity,
                self.track_features['danceability'], self.track_features['valence'],
                self.track_features['energy'], self.track_explicit, self.track_features['key'],
                self.track_features['liveness'], self.track_features['loudness'],
                self.track_features['speechiness'], self.track_features['tempo']]

    def get_artist_song_list(self, artist):
        # get track attributes
        self.asl_response = self.sp_connection.search(
            q="artist:" + artist,
            type="track",
            limit=10)

In [35]:
if 'ae' in globals():
    del ae
ae = ApiExtractor()
ae.get_artist_song_list("['Badfinger']")

In [53]:
dictio = ae.asl_response['tracks']['items'][0]
dictio.keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])