## SearchEngine Class

In [76]:
# Data Manipulation
import numpy as np
import pandas as pd

class SearchEngine():
    def __init__(self, data):
        '''
        SearchEngine class
        Input: a songs dataset extracted from spotify API
        '''
        self.data = data
    
    def target_song(self, title='', artist=''):
        '''
        Search engine function for the target song
        Input: song title and/or artist
        Output: self.target dataset song
        '''
        # transform input strings in lowercase
        title = str(title).lower()
        artist = str(artist).lower()
        # filter self.data on the desired song
        if title != '' and artist != '':
            self.target = self.data[self.data['artists'].str.lower().str.contains(artist)]
            self.target = self.target[self.target['name'].str.lower().str.contains(title)]
        elif title != '':
            self.target = self.data[self.data['name'].str.lower().str.contains(title)]
        elif artist != '':
            self.target = self.data[self.data['artists'].str.lower().str.contains(artist)]
        else:
            print('Please select a song title and artist')
        # keep only the first song result
        self.target = self.target.head(1)
        self.artist = self.target["artists"].to_string(index=False).strip("['").strip("']")
        self.title = self.target["name"].to_string(index=False)
        print(f'TITLE: {self.title}')
        print(f'ARTIST: {self.artist}')

## Preprocessor Class

In [77]:
# Data Manipulation
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
from sklearn.utils import shuffle

# Pipeline and Column Transformers
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer, make_column_selector
from sklearn import set_config
set_config(display = "diagram")

# Scaling
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler

# Package classes
# from music_similarity import SearchEngine

class Preprocessor():
    def __init__(self, se):
        '''
        Preprocessor class
        Input: a songs dataset extracted from spotify API
        '''
        self.se = se
        
    def scale_data(self):
        '''
        Adapting data function
        '''
        # drop non numerical features before scaling
        self.X=se.data.drop(columns=['name','artists'])
        self.X_target=se.target.drop(columns=['name','artists'])
        # fit and transofrm with MinMaxScaler
        mmscaler = MinMaxScaler().fit(self.X)
        self.X_mmscaled=mmscaler.transform(self.X)
        self.X_target_mmscaled=mmscaler.transform(self.X_target)
        # fit and transofrm with RobustScaler
        roscaler = RobustScaler().fit(self.X)
        self.X_roscaled=roscaler.transform(self.X)
        self.X_target_roscaled=roscaler.transform(self.X_target)

## Playlist Class

In [78]:
# Data Manipulation
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)

# Unsupervised Learning
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

# K-nn modelling
from sklearn.neighbors import NearestNeighbors

# Package classes
# from music_similarity import SearchEngine, Preprocessor

class Playlist():
    def __init__(self, preprocessor, se):
        '''
        Extractor class
        Input: preprocessor class dataset extracted from spotify API
        '''
        self.preprocessor = preprocessor
        self.se = se
    
    def build_model(self):
        '''
        Model builder function
        '''
        self.model=NearestNeighbors(n_neighbors=10).fit(self.preprocessor.X_mmscaled)
        distance, index=self.model.kneighbors(self.preprocessor.X_target_mmscaled, n_neighbors=10)
        self.playlist = self.se.data.iloc[index[0],:].sort_values(by=['tempo'])
        self.playlist['distance'] = distance[0]

In [79]:
if 'se' in globals():
    del se
spotify = pd.read_csv('../raw_data/ML_spotify_data.csv')
se = SearchEngine(spotify)
se.target_song("f", "u2")

if 'preprocessor' in globals():
    del preprocessor
preprocessor = Preprocessor(se)
preprocessor.scale_data()

if 'playlist' in globals():
    del playlist
playlist = Playlist(preprocessor, se)
playlist.build_model()

TITLE: A Sort Of Homecoming - Live
ARTIST: U2


In [80]:
playlist.playlist

Unnamed: 0,name,artists,popularity,danceability,valence,energy,explicit,key,liveness,loudness,speechiness,tempo,distance
704,"It Ain't Me, Babe - Live at LA Forum, Inglewoo...",['Bob Dylan'],23,0.455,0.308,0.981,0,7,0.995,-6.409,0.183,100.49,0.0
115,"Nutrocker - Live At Newcastle City Hall, 26.3....","['Emerson, Lake & Palmer']",27,0.487,0.525,0.821,0,7,0.89,-12.854,0.0765,118.521,0.171471
3715,"Blasphemous Rumours - Live at Rose Bowl, Pasad...",['Depeche Mode'],28,0.667,0.504,0.901,0,6,0.942,-9.258,0.0384,119.747,0.223612
1509,Feats Don't Fail Me Now - Live at Lisner Audit...,['Little Feat'],20,0.414,0.549,0.89,0,7,0.972,-10.558,0.0736,120.753,0.232693
3570,Breaking The Silence - Remastered 2003,['Queensrÿche'],27,0.55,0.496,0.983,0,7,0.906,-3.194,0.0832,122.912,0.236184
2705,A Sort Of Homecoming - Live,['U2'],22,0.505,0.363,0.883,0,6,0.97,-6.794,0.0578,125.824,0.238395
4048,Comin' Atcha Live / Truckin' - Live At The Tro...,['Tesla'],27,0.501,0.411,0.766,0,4,0.957,-11.938,0.0765,126.806,0.239608
3305,Battle Angels,['Sanctuary'],31,0.364,0.367,0.866,0,7,0.837,-13.331,0.0603,139.403,0.244874
1877,Shoot Shoot - Live / 2008 Remaster,['UFO'],26,0.345,0.299,0.972,0,7,0.965,-5.674,0.102,143.364,0.245215
2289,You've Got Another Thing Coming - Live from th...,['Judas Priest'],27,0.382,0.327,0.929,0,6,0.966,-5.921,0.0568,144.354,0.269785
