In [1]:
import pandas as pd

from sklearn.manifold import TSNE

import altair as alt

data5_palette = ["#DE6449", "#00D856", "#66999B", "#52D1DC", "#F3A712"]
BG = '#0c0c0c'
FG = "#eadeda"
color_scheme = alt.Scale(range=data5_palette)

In [2]:
colnames = ["song_id"] + [
    f"H{n}" for n in range(3361)
]

In [3]:
repr_df = pd.read_csv("flattened_representations.csv", header=None, names=colnames)

In [4]:
na_cols_series = repr_df.isna().any()
na_cols_lst = [
    colname 
    for colname, missing in na_cols_series.iteritems()
    if missing
]
print(len(na_cols_lst))

1


In [5]:
repr_df.drop(na_cols_lst, axis=1, inplace = True)

In [6]:
num_cols = [col for  col in repr_df.columns if col != "song_id"]
to_tsne = repr_df[num_cols]

In [7]:
tsne = TSNE(n_components=2).fit_transform(to_tsne)

In [8]:
tsne = pd.DataFrame(tsne, columns=["TSNE1", "TSNE2"])
tsne["song_id"] = repr_df["song_id"]

In [9]:
songs_df = pd.read_csv("../../data/combined_df_final.csv", sep=";")

In [10]:
songs_grouped = songs_df.groupby("song_id").first()

In [11]:
combined_df = songs_grouped.merge(tsne, on=["song_id", "song_id"])

In [12]:
combined_df["date_chart"] = pd.to_datetime(combined_df["date_chart"], 
                                           format="%d.%m.%Y")
combined_df["month"] = combined_df["date_chart"].dt.month
combined_df["year"] = combined_df["date_chart"].dt.year

combined_df["album_release_date"] = pd.to_datetime(combined_df["date_chart"], 
                                           format="%Y-%m-%d")
combined_df["year"] = combined_df["album_release_date"].dt.year

In [13]:
combined_df["artists_names"] = combined_df["artists_names"].str.replace("'", "")\
                                                           .str.replace("[", "")\
                                                           .str.replace("]", "")

  combined_df["artists_names"] = combined_df["artists_names"].str.replace("'", "")\


In [26]:
alt.data_transformers.disable_max_rows()

slider = alt.binding_range(min=2006, max=2021, step=1, name='Release Year')
selection = alt.selection_single(fields=['year'],
                                bind=slider, init={'year': 2007})

everything = alt.Chart(
    combined_df
).mark_circle(
    size=30
).encode(
    x=alt.X("TSNE1", title='', axis=None),
    y=alt.Y("TSNE2", title='', axis=None),
    color=alt.value(data5_palette[0]),
    opacity=alt.condition(
        selection, 
        alt.value(0), 
        alt.value(0.5)
    )
).add_selection( 
    selection
).properties(
    width=800,
    height=450
)

current_year = alt.Chart(
    combined_df
).mark_circle(
    size=60, opacity=0.9,
).encode(
    x=alt.X("TSNE1", title='', axis=None),
    y=alt.Y("TSNE2", title=''),
    color=alt.value(data5_palette[1]),
    selection
).properties(
    width=800,
    height=450
)

tooltip = alt.Chart(
    combined_df
).mark_circle(
    opacity=0,
).encode(
    x=alt.X("TSNE1", title='', axis=None),
    y=alt.Y("TSNE2", title='', axis=None),
    tooltip=["song_name", 
             "artists_names", 
             alt.Tooltip("album_release_date:T")],
).properties(
    width=800,
    height=500
)

layered = alt.layer(
    everything, 
    current_year, 
    tooltip
).configure(
    background=BG
).configure_view(
    strokeWidth=0
)

layered#.save('autoencoder_viz.html')