In [1]:
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np

In [2]:
spotiftySongAuidoFeatures = pd.read_csv("AudioFeaturesFromSpotify.csv")

In [3]:
def load(filename = "filename.pickle"): 
    try: 
        with open(filename, "rb") as f: 
            return pickle.load(f) 
        
    except FileNotFoundError: 
        if filename == "scaler.pickle":
            return StandardScaler()
        else:
            return KMeans(random_state=1234)

In [4]:
def saveScaler(scaler): 
    with open("scaler.pickle", "wb") as f:
        pickle.dump(scaler,f)

In [5]:
def saveKmeans(kmeans): 
    with open("kmeans.pickle", "wb") as f:
        pickle.dump(kmeans,f)

In [6]:
def clusterSong():
    filteredSpotiftySongAuidoFeatures = spotiftySongAuidoFeatures[spotiftySongAuidoFeatures.select_dtypes(include=np.number).columns]
    
    scaler = load("scaler.pickle")
    scaler.fit(filteredSpotiftySongAuidoFeatures)
    filteredSpotiftySongAuidoFeatures_scaled = scaler.transform(filteredSpotiftySongAuidoFeatures)
    filteredSpotiftySongAuidoFeatures_scaled_df = pd.DataFrame(filteredSpotiftySongAuidoFeatures_scaled, columns = filteredSpotiftySongAuidoFeatures.columns)
    
    kmeans = load("kmeans.pickle")
    kmeans.fit(filteredSpotiftySongAuidoFeatures_scaled_df)
    
    spotiftySongAuidoFeatures["cluster"] = kmeans.predict(filteredSpotiftySongAuidoFeatures_scaled_df)
    
    saveScaler(scaler)
    saveKmeans(kmeans)
    
    return spotiftySongAuidoFeatures
    
    

In [7]:
clusteredSpotiftySongAuidoFeatures = clusterSong()
clusteredSpotiftySongAuidoFeatures.to_csv("ClusteredAudioFeaturesFromSpotify.csv", index=False)