In [52]:
best_features = ['acousticness','danceability','duration_ms','energy','instrumentalness',
                            'liveness','loudness','speechiness','tempo','valence','Chroma_1',
                            'Chroma_2','Chroma_3','Chroma_4','Chroma_5','Chroma_6','Chroma_7',
                            'Chroma_8','Chroma_9','Chroma_10','Chroma_11','Chroma_12','MEL_1','MEL_2',
                            'MEL_3','MEL_4','MEL_5','MEL_6','MEL_7','MEL_8','MEL_13','MEL_14',
                            'MEL_16','MEL_17','MEL_18','MEL_19','MEL_20','MEL_22','MEL_23','MEL_24',
                            'MEL_27','MEL_30','MEL_51','MFCC_2','MFCC_3','MFCC_4','MFCC_5','MFCC_6',
                            'MFCC_7','MFCC_8','MFCC_9','MFCC_10','MFCC_11','MFCC_12','MFCC_13',
                            'MFCC_14','MFCC_15','MFCC_16','MFCC_17','MFCC_18','MFCC_19','MFCC_20',
                            'MFCC_21','MFCC_22','MFCC_23','MFCC_24','MFCC_25','MFCC_26','MFCC_27',
                            'MFCC_28','MFCC_29','MFCC_30','MFCC_31','MFCC_32','MFCC_33','MFCC_34',
                            'MFCC_35','MFCC_36','MFCC_37','MFCC_38','MFCC_39','MFCC_40','MFCC_41',
                            'MFCC_42','MFCC_43','MFCC_44','MFCC_45','MFCC_46','MFCC_47','MFCC_48',
                            'Spectral_contrast_1','Spectral_contrast_2','Spectral_contrast_3',
                            'Spectral_contrast_4','Spectral_contrast_5','Spectral_contrast_6',
                            'Spectral_contrast_7','Tonnetz_1','Tonnetz_2','Tonnetz_3','Tonnetz_4',
                            'Tonnetz_5','Tonnetz_6','entropy_energy','spectral_centroid',
                            'spectral_rollOff_min']

In [59]:
import pandas as pd
import numpy as np
import pickle
import time
import os
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn.neighbors import NearestNeighbors


class SpotifyRecommender:
    supported = ['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence'] + [f'Chroma_{i}' for i in range(1, 13)] + [f'MEL_{i}' for i in range(1, 129)] + [f'MFCC_{i}' for i in range(1, 49)] + [f'Spectral_contrast_{i}' for i in range(1, 8)] + [f'Tonnetz_{i}' for i in range(1, 7)] + ['ZCR', 'entropy_energy', 'spectral_bandwith', 'spectral_centroid', 'spectral_rollOff_max', 'spectral_rollOff_min']
    return_columns = ['name','artists_name','artists_genres','album_name','track_href','preview_url','analysis_url','href','lyrics','playlist','popularity','tempo','time_signature','track_id',
             'artists_followers', 'artists_artist_popularity', 'artists_id','album_release_date','album_images','album_total_tracks','album_external_urls','album_id']
    def __init__(self,load_model="",k=20):
        self.scaler = None
        self.pca = None
        self.sfm = None
        self.knn = None
        self.k = k
        self.filepath = 'saved_modelV2.pkl'
        self.dataset = None
        self.trained_features = []
        if not load_model == "":
            path = os.path.join(load_model)
            self.load(path)
        

    def get_features(self,data,features):
        listed = data.columns.tolist()
        for f in listed:
            if f in self.supported and f in features:
                self.trained_features.append(f)
        return


    def train(self,dataset,n_lines=0,features=supported):
        self.get_features(dataset,features)
        self.dataset = dataset

        if not n_lines == 0:
            dataset = dataset.sample(n=n_lines)

        self.scaler = StandardScaler()
        scaled_data = self.scaler.fit_transform(dataset[self.trained_features])

        rf = RandomForestRegressor(n_estimators=100, random_state=42)
        rf.fit(scaled_data, dataset.index)  # Hier verwende ich data.index anstelle von target

        self.sfm = SelectFromModel(rf, threshold='mean')
        self.sfm.fit(scaled_data, dataset.index)  # Hier verwende ich data.index anstelle von target

        self.pca = PCA(n_components=0.95)
        reduced_data = self.pca.fit_transform(scaled_data[:, self.sfm.get_support()])

        self.knn = NearestNeighbors(n_neighbors=self.k)
        self.knn.fit(reduced_data)

    def predict(self, df_selected,k=None):
        if not k==None:
            self.knn.n_neighbors = k

        new_scaled_data = self.scaler.transform(df_selected[self.trained_features])
        new_reduced_data = self.pca.transform(new_scaled_data[:, self.sfm.get_support()])
        distances, indices = self.knn.kneighbors(new_reduced_data)
        
        self.knn.n_neighbors = self.k
        return self.dataset[self.return_columns].iloc[indices[0]]#, distances, indices

    def save(self, file_path=None):
        if file_path == None:
            file_path = self.filepath
        model_data = {
            'knn': self.knn,
            'scaler': self.scaler,
            'pca': self.pca,
            'sfm': self.sfm,
            'dataset': self.dataset,
            'filepath': file_path,
            'trained_features': self.trained_features
        }
        with open(file_path, 'wb') as f:
            pickle.dump(model_data, f)

    def load(self, file_path):
        with open(file_path, 'rb') as f:
            model_data = pickle.load(f)

        self.knn = model_data['knn']
        self.scaler = model_data['scaler']
        self.pca = model_data['pca']
        self.sfm = model_data['sfm']
        self.filepath = model_data['filepath']
        self.trained_features =  model_data['trained_features']
        self.dataset = model_data['dataset']

In [None]:
import tests.etl_test as etl
data = etl.prepare_dataset()
data

In [53]:
features = ['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature']
model = SpotifyRecommender()

model.train(data, 5000, best_features)


In [57]:
model.save('modelV2_5000_best.pkl')

In [60]:
model2 = SpotifyRecommender()
model2.load('modelV2_5000_best.pkl')

In [55]:
pred = data[data['track_id']=='0EYOdF5FCkgOJJla8DI2Md']
pred[['name','artists_name','album_name','artists_genres']]

Unnamed: 0,name,artists_name,album_name,artists_genres
700,B.Y.O.B.,System Of A Down,Mezmerize,"[alternative metal, nu metal, post-grunge, ..."


In [63]:
model2.predict(pred)

Unnamed: 0,name,artists_name,artists_genres,album_name,track_href,preview_url,analysis_url,href,lyrics,playlist,...,time_signature,track_id,artists_followers,artists_artist_popularity,artists_id,album_release_date,album_images,album_total_tracks,album_external_urls,album_id
1820,In My Craft or Sullen Art,Dylan Thomas,[poetry],Pleasure Dome: Audible Modern Poetry Read by i...,https://api.spotify.com/v1/tracks/56NEH1rR3mF8...,https://p.scdn.co/mp3-preview/80d5d34b31abaff0...,https://api.spotify.com/v1/audio-analysis/56NE...,https://api.spotify.com/v1/tracks/56NEH1rR3mF8...,\r\nIn my craft or sullen art\r\nExercised in ...,Modern Poetry,...,4.0,56NEH1rR3mF8bRBChmuXjX,2119,15,33PtzSjT25Ve4MwKu3xNff,1950-01-01,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",25,{'spotify': 'https://open.spotify.com/album/6T...,6TTA2pOc7mXC5Wp2BlZvea
2781,Flores em Vida,Zezé Di Camargo & Luciano,"[axe, pagode, sertanejo, sertanejo universi...",Teorias de Raul,https://api.spotify.com/v1/tracks/0yUFfbx9lqaw...,https://p.scdn.co/mp3-preview/975a4f9b6b1d5f93...,https://api.spotify.com/v1/audio-analysis/0yUF...,https://api.spotify.com/v1/tracks/0yUFfbx9lqaw...,\r\n\r\n(Christmas)\r\nThe snow's coming down\...,This Is Zezé Di Camargo & Luciano,...,4.0,0yUFfbx9lqawz6DOx8eBZB,850354,64,4dyYjqmYDjegbB3F2mbvcT,2014-06-10,"[{'height': 576, 'url': 'https://i.scdn.co/ima...",16,{'spotify': 'https://open.spotify.com/album/5b...,5btZGpiPTfOVX1cmKbrbE4
4453,Benjamin Blümchen Lied,Benjamin Blümchen,[hoerspiel],Folge 116: und die Spaßmaschine,https://api.spotify.com/v1/tracks/3ovv4gzs9Z03...,https://p.scdn.co/mp3-preview/7053a0dcf9199998...,https://api.spotify.com/v1/audio-analysis/3ovv...,https://api.spotify.com/v1/tracks/3ovv4gzs9Z03...,\r\nAuf ner schönen grünen Wiese liegt ein gro...,Spaß mit Freunden: Hörspiele,...,4.0,3ovv4gzs9Z038lJQf6NyCU,40943,72,1l6d0RIxTL3JytlLGvWzYe,2011-01-07,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",39,{'spotify': 'https://open.spotify.com/album/52...,52BYCPzRCmDSRd7mkZkXYU
2329,Symphony No. 5 in B-Flat Major; WAB 105: I. In...,Anton Bruckner,"[classical, late romantic era]",Bruckner: Symphonies 5 & 9,https://api.spotify.com/v1/tracks/1tza7D25joES...,https://p.scdn.co/mp3-preview/b5904b8e43c9db5d...,https://api.spotify.com/v1/audio-analysis/1tza...,https://api.spotify.com/v1/tracks/1tza7D25joES...,-99,Klassische Meisterwerke,...,4.0,1tza7D25joESgnyIzjgQJX,29443,47,2bM3j1JQWBkmzuoZKu4zj2,1990-01-01,"[{'height': 576, 'url': 'https://i.scdn.co/ima...",7,{'spotify': 'https://open.spotify.com/album/0Z...,0Z16Rbsc0amjDVKcSnP4WI
3055,Outro,Hank Schrader,[not defined],Breaking Bad,https://api.spotify.com/v1/tracks/64pYZR9UjaO5...,https://p.scdn.co/mp3-preview/3bf0c90fdc056fea...,https://api.spotify.com/v1/audio-analysis/64pY...,https://api.spotify.com/v1/tracks/64pYZR9UjaO5...,\r\nOut in the West Texas town of El Paso\r\nI...,Breaking Bad,...,0.0,64pYZR9UjaO5QA9PWZBe15,9,0,2zVcNsMkuMGeUJO1pAY0Nw,2018-07-17,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",11,{'spotify': 'https://open.spotify.com/album/6m...,6m50xHFiTuQc9xrJzbueEz
2236,"Divertimento in F Major, K. 138, ""Salzburg Sym...",Wolfgang Amadeus Mozart,"[classical, classical era]",Mozart: Salzburg Symphonies,https://api.spotify.com/v1/tracks/76zEx8txhROE...,https://p.scdn.co/mp3-preview/1a68fce373199233...,https://api.spotify.com/v1/audio-analysis/76zE...,https://api.spotify.com/v1/tracks/76zEx8txhROE...,-99,Classical Feast,...,3.0,76zEx8txhROES6MmvEiDih,2746606,76,4NJhFmfw43RLBLjQvxDuRS,1988-06-30,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",14,{'spotify': 'https://open.spotify.com/album/3l...,3lZKH6rrfJrFwgHmW3B6LX
4528,Benjamin Blümchen Lied,Benjamin Blümchen,[hoerspiel],Folge 141: Nachts in der Erfinderwerkstatt,https://api.spotify.com/v1/tracks/6j3YTWlfxifq...,https://p.scdn.co/mp3-preview/7053a0dcf9199998...,https://api.spotify.com/v1/audio-analysis/6j3Y...,https://api.spotify.com/v1/tracks/6j3YTWlfxifq...,\r\nAuf ner schönen grünen Wiese liegt ein gro...,Hörspielzeit: Benjamin Blümchen,...,4.0,6j3YTWlfxifqNV9LyyZjOp,40943,72,1l6d0RIxTL3JytlLGvWzYe,2019-01-25,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",26,{'spotify': 'https://open.spotify.com/album/4j...,4jxT7kNMkxJTrvI5nTY7RS
4067,Pretty Shining People - Jack Wins Remix,George Ezra,"[neo-singer-songwriter, pop]",Pretty Shining People (Jack Wins Remix),https://api.spotify.com/v1/tracks/5KsdfLLbAknc...,https://p.scdn.co/mp3-preview/26a0145bc376b600...,https://api.spotify.com/v1/audio-analysis/5Ksd...,https://api.spotify.com/v1/tracks/5KsdfLLbAknc...,\r\n\r\nAlright\r\nAlright\r\n\r\nMe and Sam i...,Beats of Tomorrow,...,4.0,5KsdfLLbAkncvkvtHavfLd,1966224,81,2ysnwxxNtSgbb9t1m2Ur4j,2019-03-08,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",1,{'spotify': 'https://open.spotify.com/album/07...,07YRBa8JGxQC3pCTOSQxzE
1018,Feel Good Inc,Gorillaz,"[alternative hip hop, art pop, rock]",Demon Days,https://api.spotify.com/v1/tracks/0d28khcov6Ai...,https://p.scdn.co/mp3-preview/7388c425022ced92...,https://api.spotify.com/v1/audio-analysis/0d28...,https://api.spotify.com/v1/tracks/0d28khcov6Ai...,"\r\n\r\nHahahahahahahahahahahahahahahahaha, Fe...",Top Gaming Tracks,...,4.0,0d28khcov6AiegSCpG5TuT,4074301,79,3AA28KZvwAUcZuOKwyblJQ,2005-05-23,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",15,{'spotify': 'https://open.spotify.com/album/0b...,0bUTHlWbkSQysoM3VsWldT
4593,Kapitel 11: Der Dinosaurierknochen (Folge 139),Benjamin Blümchen,[hoerspiel],Folge 139: Der Dinosaurierknochen,https://api.spotify.com/v1/tracks/1IVwFTXKVqcl...,https://p.scdn.co/mp3-preview/6e551fd4f03c9b5d...,https://api.spotify.com/v1/audio-analysis/1IVw...,https://api.spotify.com/v1/tracks/1IVwFTXKVqcl...,\r\nAuf ner schönen grünen Wiese liegt ein gro...,Hörspielzeit: Benjamin Blümchen,...,3.0,1IVwFTXKVqclXQeL33QagP,40943,72,1l6d0RIxTL3JytlLGvWzYe,2018-05-25,"[{'height': 640, 'url': 'https://i.scdn.co/ima...",29,{'spotify': 'https://open.spotify.com/album/4q...,4qQbfYDSaexEwY7dI95qMi
