In [72]:
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
import pandas as pd

In [73]:
df = pd.read_csv("song_chars.csv")
df = df.iloc[:, 1:]
df.rename({"month": "target"}, axis=1, inplace=True)
df.drop(["song_name", "artist"], axis=1, inplace=True)

In [74]:
def key_to_int(key):
    keys = {
        "C": 0, "C#/Db": 1, "D": 2, "D#/Eb": 3, "E": 4, "F": 5, 
        "F#/Gb": 6, "G": 7, "G#/Ab": 8, "A": 9, "A#/Bb": 10, "B": 11
    }
    return keys[key]

In [75]:
df["key"] = df["key"].apply(key_to_int)

In [76]:
df.drop(["year", "season"], axis=1, inplace=True)

In [77]:
df

Unnamed: 0,target,danceability,energy,key,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms
0,1,0.754,0.819,0,0.1120,0.0684,0.000000,0.3450,0.8180,142.137,215800
1,1,0.691,0.764,9,0.0386,0.1120,0.004190,0.0509,0.8330,115.980,240013
2,1,0.661,0.346,11,0.0300,0.4520,0.000000,0.1310,0.2560,129.752,263667
3,1,0.610,0.497,9,0.0279,0.3200,0.000061,0.0884,0.7390,85.031,250360
4,1,0.590,0.666,4,0.0244,0.1560,0.000000,0.1360,0.0743,104.042,263800
...,...,...,...,...,...,...,...,...,...,...,...
21045,9,0.776,0.808,11,0.1170,0.0297,0.000127,0.1030,0.4940,123.988,184104
21046,9,0.855,0.488,3,0.2080,0.5060,0.000000,0.3470,0.6190,124.097,174253
21047,9,0.797,0.631,3,0.0786,0.0904,0.000004,0.0998,0.2880,129.915,184213
21048,9,0.792,0.548,4,0.0820,0.0784,0.000073,0.1670,0.1970,110.105,198614


In [78]:
target = df["target"]
features = df.iloc[:, 1:]

In [79]:
dimensionalities = {"PCA": PCA(n_components=2), "TSNE": TSNE(n_components=2)}

In [70]:
for name, d_algo in dimensionalities.items():
    features_standardized = StandardScaler().fit_transform(features)
    reduced_data = d_algo.fit_transform(features_standardized)
    reduced_df = pd.DataFrame(reduced_data, columns = ["Component1", "Component2"])
    reduced_df["target"] = target
    graph = px.scatter(reduced_df, x="Component1", y="Component2", color = "target")
    graph.show()


The default initialization in TSNE will change from 'random' to 'pca' in 1.2.


The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2.



KeyboardInterrupt: 

In [71]:
fig = px.scatter_matrix(df,
    dimensions=["danceability", "energy", "key", 
                "speechiness", "acousticness", "instrumentalness", 
                "liveness", "valence", "tempo", "duration_ms"],
                color="target")
fig.show()