In [1]:
import numpy as np
import pandas as pd
import pickle
from sklearn import datasets # sklearn comes with some toy datasets to practise
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from matplotlib import pyplot
from sklearn.metrics import silhouette_score
import os


class ClusterNetwork:


    def __init__(self) -> None:
        self.__cluster_data = None
        self.__scaled_cluster_data = None
        self.__scaler = None
        self.__kmeans = None
        self.__labels = None
        pass
    
    # old tradition, set_get methods for work data
    # also __private to prevent access from outside this class
    
    def set_cluster_data(self, data_frame: pd.DataFrame) -> None:
        self.__cluster_data = data_frame
    

    def get_cluster_data(self) -> pd.DataFrame:
        return self.__cluster_data


    def set_scaled_cluster_data(self, scaled_cluster_data) -> None:
        self.__scaled_cluster_data = scaled_cluster_data


    def get_scaled_cluster_data(self) -> pd.DataFrame:
        return self.__scaled_cluster_data

    
    '''
    ToDo: running kmean cycle
    '''
    
    
    def set_scaler_on_cluster_data(self) -> None:
        self.__scaler = StandardScaler()
        self.__scaler.fit(self.get_cluster_data())
        self.set_scaled_cluster_data = self.__scaler.transform(self.get_cluster_data())


    def create_and_train_cluster(self, n_cluster=8, random_state=42) -> None:
        self.__kmeans = KMeans(n_clusters=n_cluster, random_state=random_state)
        self.__kmeans.fit(self.get_scaled_cluster_data())
        self.__labels = self.__kmeans.predict(self.get_scaled_cluster_data)
        print(self.__labels)
        pass
    

    def run_cluster_cycle(self) -> None:
        if self.get_cluster_data != None:
            self.set_scaler_on_cluster_data()
            self.create_and_train_cluster()
        else:
            print('not possible without cluster data')
        pass
    
    
    '''
    Helper functions to setup kmean, store and load models
    '''

    def load_kmean_model(self, model_name: str) -> KMeans:
        if os.path.isfile('../data/models/'+model_name+'.pickle'):
            with open('../data/models'+model_name+'.pickle', 'rb') as f:
                return pickle.load(file=f)
        else:
            print('could not found model or path')


    def save_kmean_model(self, model_object: KMeans, model_name: str) -> None:
        if os.path.isdir('../data/models/'):
            with open('../data/models'+model_name+'.pickle', 'wb') as f:
                pickle.dump(model_object, file=f)
        else:
            print('could not found path data/models')

In [2]:
# load data

data = pd.read_csv('../data/song_data.csv')
data.drop(columns='Unnamed: 0', inplace=True)

In [3]:
data

Unnamed: 0.1,Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,...,type,id,uri,track_href,analysis_url,duration_ms,time_signature,track_name,artist_name,artist_id
0,0,0.776,0.378,0,-8.035,1,0.0322,0.4350,0.001870,0.1100,...,audio_features,2EgfLUS0jNiujIWc3ZLEtn,spotify:track:2EgfLUS0jNiujIWc3ZLEtn,https://api.spotify.com/v1/tracks/2EgfLUS0jNiu...,https://api.spotify.com/v1/audio-analysis/2Egf...,175861,4,Tangerine,Tim Atlas,3CiuXDKttPUT0tWGHicFUH
1,1,0.663,0.697,0,-5.503,1,0.0508,0.2720,0.008860,0.1530,...,audio_features,2ngRZDAluwYoJeuqEA4dhK,spotify:track:2ngRZDAluwYoJeuqEA4dhK,https://api.spotify.com/v1/tracks/2ngRZDAluwYo...,https://api.spotify.com/v1/audio-analysis/2ngR...,198384,4,Sidestep,Tim Atlas,3CiuXDKttPUT0tWGHicFUH
2,2,0.596,0.675,9,-7.790,1,0.0517,0.1880,0.739000,0.1020,...,audio_features,3tcJ3yUXKtJpsgpAyVzP7R,spotify:track:3tcJ3yUXKtJpsgpAyVzP7R,https://api.spotify.com/v1/tracks/3tcJ3yUXKtJp...,https://api.spotify.com/v1/audio-analysis/3tcJ...,242163,4,Crime of Passion,Tim Atlas,3CiuXDKttPUT0tWGHicFUH
3,3,0.593,0.274,2,-15.402,1,0.2780,0.9340,0.000569,0.0758,...,audio_features,6W4osAjSVCvUwOlVFBP76n,spotify:track:6W4osAjSVCvUwOlVFBP76n,https://api.spotify.com/v1/tracks/6W4osAjSVCvU...,https://api.spotify.com/v1/audio-analysis/6W4o...,226683,4,Together Lonely,Tim Atlas,3CiuXDKttPUT0tWGHicFUH
4,4,0.871,0.281,9,-10.650,0,0.0466,0.7670,0.000180,0.1470,...,audio_features,1vdpFZ4rsQevl8WC6m3m9y,spotify:track:1vdpFZ4rsQevl8WC6m3m9y,https://api.spotify.com/v1/tracks/1vdpFZ4rsQev...,https://api.spotify.com/v1/audio-analysis/1vdp...,184259,4,Small Talk,Tim Atlas,3CiuXDKttPUT0tWGHicFUH
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222308,222308,0.906,0.724,6,-4.109,0,0.0931,0.4460,0.000000,0.0848,...,audio_features,4dUlJyHgdz6IeCJIYMHHDm,spotify:track:4dUlJyHgdz6IeCJIYMHHDm,https://api.spotify.com/v1/tracks/4dUlJyHgdz6I...,https://api.spotify.com/v1/audio-analysis/4dUl...,164769,4,Me Gusta - Remix (feat. Cardi B & 24kGoldn),Cardi B,4kYSro6naA4h99UJvo89HB
222309,222309,0.934,0.443,1,-7.541,1,0.4100,0.0272,0.000000,0.0889,...,audio_features,051wt8AyLFgYnVuberd3vO,spotify:track:051wt8AyLFgYnVuberd3vO,https://api.spotify.com/v1/tracks/051wt8AyLFgY...,https://api.spotify.com/v1/audio-analysis/051w...,187541,4,WAP (feat. Megan Thee Stallion),Cardi B,4kYSro6naA4h99UJvo89HB
222310,222310,0.903,0.447,6,-11.554,1,0.1160,0.0873,0.000000,0.1360,...,audio_features,3DyiAk1BzIF8rq9rimypG4,spotify:track:3DyiAk1BzIF8rq9rimypG4,https://api.spotify.com/v1/tracks/3DyiAk1BzIF8...,https://api.spotify.com/v1/audio-analysis/3Dyi...,374545,4,La Bebe - Remix,Cardi B,4kYSro6naA4h99UJvo89HB
222311,222311,0.805,0.835,0,-4.603,1,0.0896,0.1300,0.000005,0.3650,...,audio_features,1EJgymgJHcjSOGSHcYaxvW,spotify:track:1EJgymgJHcjSOGSHcYaxvW,https://api.spotify.com/v1/tracks/1EJgymgJHcjS...,https://api.spotify.com/v1/audio-analysis/1EJg...,188230,4,South of the Border (feat. Camila Cabello & Ca...,Cardi B,4kYSro6naA4h99UJvo89HB
