## Visualizations

In [222]:
import pandas as pd
import altair as alt
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

In [52]:
by_isrc = pd.read_csv('data/checkpoint/by_isrc_oldest.csv')

In [54]:
by_isrc.head().T

Unnamed: 0,0,1,2,3,4
isrc,AEA040700577,AEA040700578,AEA040700579,AEA040700580,AEA040700581
genres,"['j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop', 'pop']"
name,Bala Wala Chi,Houdou Nisbi,Nafs Al Sheghlat,Yalla Kichou Barra,Ma Tfel
artists,Ziad Rahbani,Ziad Rahbani,Ziad Rahbani,Ziad Rahbani,Ziad Rahbani
album,Houdou Nisbi,Houdou Nisbi,Houdou Nisbi,Houdou Nisbi,Houdou Nisbi
release_date,1985-01-01,1985-01-01,1985-01-01,1985-01-01,1985-01-01
release_date_precision,day,day,day,day,day
uri,spotify:track:0fylgLeNObjVvwhd8caHqX,spotify:track:0yMFpBNCYXqwwOAg23bC8a,spotify:track:6G8l1kI8QlTD0UDIak5F8H,spotify:track:21g76Lq5Jg4QvfTDvi4PlH,spotify:track:0pKxrkFh8fxPKpkO29MYmi
spotify_id,0fylgLeNObjVvwhd8caHqX,0yMFpBNCYXqwwOAg23bC8a,6G8l1kI8QlTD0UDIak5F8H,21g76Lq5Jg4QvfTDvi4PlH,0pKxrkFh8fxPKpkO29MYmi
chart_power,,,,,


## Find the biggest genres

In [55]:
df = pd.read_csv('data/DE/data-neu.csv')
df.drop_duplicates(inplace=True)

In [56]:
ct = pd.crosstab(df['isrc'], df['genres'])
# ct.reset_index(inplace=True)
ct = ct.applymap(lambda x: 1 if x > 1 else x)
ct.reset_index(inplace=True)

In [65]:
ct_sum = ct.sum()

In [104]:
ct_sum_df = pd.DataFrame(ct_sum)

In [105]:
ct_sum_df.drop(index='isrc', inplace=True)

In [106]:
ct_sum_df.rename(columns={0: 'count'}, inplace=True)

In [143]:
sorted_genres = ct_sum_df.sort_values(by='count', ascending=False).index.to_list()

In [142]:
ct_sum_df.sort_values(by='count', ascending=False)[0:15]

Unnamed: 0_level_0,count
genres,Unnamed: 1_level_1
pop,118762
rock,93935
classical,93628
german,37752
jazz,30068
folk,30053
punk,24275
metal,15761
country,15546
french,14217


Biggest genres is pop. Hard-rock is only on the 12th place.

Count occurences of the top genres per year

In [298]:
by_isrc_year = by_isrc.copy()
by_isrc_year = by_isrc_year[['year', 'isrc']]
by_isrc_year = by_isrc_year.merge(ct, on=['isrc'], how='left')
by_isrc_year.set_index('year', inplace=True)
by_isrc_year = by_isrc_year[sorted_genres[:10]]
by_isrc_year = by_isrc_year.groupby('year').sum()
by_isrc_year.index = by_isrc_year.index.astype(str)

## Development of features over the years

In [116]:
df = pd.read_csv('data/DE/data-year.csv')

In [119]:
df = by_isrc.copy()

In [120]:
df.head(2).T

Unnamed: 0,0,1
isrc,AEA040700577,AEA040700578
genres,"['j-pop', 'jazz', 'pop']","['groove', 'j-pop', 'jazz', 'pop']"
name,Bala Wala Chi,Houdou Nisbi
artists,Ziad Rahbani,Ziad Rahbani
album,Houdou Nisbi,Houdou Nisbi
release_date,1985-01-01,1985-01-01
release_date_precision,day,day
uri,spotify:track:0fylgLeNObjVvwhd8caHqX,spotify:track:0yMFpBNCYXqwwOAg23bC8a
spotify_id,0fylgLeNObjVvwhd8caHqX,0yMFpBNCYXqwwOAg23bC8a
chart_power,,


In [121]:
songs_per_year = df.copy()
songs_per_year = songs_per_year[['year', 'popularity', 'danceability', 'energy',
                                 'key', 'loudness', 'mode', 'speechiness',
                                 'acousticness', 'instrumentalness', 'liveness',
                                 'valence', 'tempo', 'duration_ms',
                                 'time_signature']]
songs_per_year['count'] = -1
songs_per_year = songs_per_year.groupby("year").agg({
    "popularity": 'mean',
    'danceability': 'mean', 
    'energy': 'mean',
    'key': 'mean', 
    'loudness': 'mean', 
    'mode': 'mean', 
    'speechiness': 'mean',
    'acousticness': 'mean', 
    'instrumentalness': 'mean', 
    'liveness': 'mean',
    'valence': 'mean', 
    'tempo': 'mean', 
    'duration_ms': 'mean',
    'time_signature': 'mean',
    'count': 'count'
})
songs_per_year.reset_index(inplace=True)

In [122]:
songs_per_year['duration_min'] = songs_per_year['duration_ms'] / (1000 * 60)

In [123]:
songs_per_year.head(10)

Unnamed: 0,year,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,count,duration_min
0,1980,6.932103,0.499812,0.485525,5.110445,-12.356653,0.703678,0.065002,0.521989,0.169238,0.220967,0.543921,118.259726,247998.407916,3.830705,35701,4.133307
1,1981,7.112235,0.501099,0.488608,5.164865,-12.559169,0.699801,0.068562,0.511205,0.175626,0.222056,0.5414,118.658803,244699.429525,3.832266,33118,4.078324
2,1982,7.169674,0.503354,0.486809,5.193071,-12.731032,0.698866,0.077487,0.507867,0.175538,0.221643,0.531725,118.37384,247427.740306,3.835526,31926,4.123796
3,1983,7.047373,0.495283,0.471217,5.186754,-13.219378,0.687359,0.065915,0.51633,0.193409,0.217015,0.519328,117.784345,251536.693328,3.830344,34682,4.192278
4,1984,7.346757,0.491733,0.47795,5.207964,-13.240091,0.674151,0.06466,0.512988,0.18815,0.222164,0.507015,117.703958,249410.156878,3.837688,37545,4.156836
5,1985,7.163784,0.488324,0.470269,5.156586,-13.452114,0.684718,0.061279,0.523986,0.203092,0.215998,0.506548,117.184774,255583.19363,3.833289,37928,4.25972
6,1986,6.717974,0.47406,0.456206,5.176228,-13.903385,0.686777,0.062151,0.543401,0.219038,0.211249,0.486566,116.233584,256471.286957,3.82977,42184,4.274521
7,1987,6.634905,0.469873,0.443336,5.180522,-14.392384,0.692525,0.061195,0.553069,0.238068,0.207147,0.473718,116.127546,259072.147444,3.819021,50365,4.317869
8,1988,6.138412,0.464916,0.428971,5.169508,-14.948866,0.68701,0.062721,0.565367,0.260287,0.210766,0.472932,115.397637,262048.766115,3.810233,58593,4.367479
9,1989,6.067652,0.467819,0.41877,5.19164,-15.100176,0.691717,0.064646,0.572174,0.254533,0.206904,0.466538,115.049823,261012.425156,3.807026,65216,4.350207


In [124]:
songs_per_year.columns

Index(['year', 'popularity', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'duration_ms', 'time_signature', 'count',
       'duration_min'],
      dtype='object')

In [125]:
songs_per_year = songs_per_year.astype({'year': str})

In [126]:
alt.Chart(songs_per_year).mark_bar().encode(
    alt.X("year"),
    alt.Y("count")
)

In [127]:
features = ['popularity', 'danceability', 'energy',
            'key', 'loudness', 'mode', 'speechiness',
            'acousticness', 'instrumentalness', 'liveness',
            'valence', 'tempo', 'duration_min',
            'time_signature']

In [283]:
songs_per_year.head()

Unnamed: 0,year,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,count,duration_min
0,1980,6.932103,0.499812,0.485525,5.110445,-12.356653,0.703678,0.065002,0.521989,0.169238,0.220967,0.543921,118.259726,247998.407916,3.830705,35701,4.133307
1,1981,7.112235,0.501099,0.488608,5.164865,-12.559169,0.699801,0.068562,0.511205,0.175626,0.222056,0.5414,118.658803,244699.429525,3.832266,33118,4.078324
2,1982,7.169674,0.503354,0.486809,5.193071,-12.731032,0.698866,0.077487,0.507867,0.175538,0.221643,0.531725,118.37384,247427.740306,3.835526,31926,4.123796
3,1983,7.047373,0.495283,0.471217,5.186754,-13.219378,0.687359,0.065915,0.51633,0.193409,0.217015,0.519328,117.784345,251536.693328,3.830344,34682,4.192278
4,1984,7.346757,0.491733,0.47795,5.207964,-13.240091,0.674151,0.06466,0.512988,0.18815,0.222164,0.507015,117.703958,249410.156878,3.837688,37545,4.156836


In [282]:
# Create the initial scatter plot
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=songs_per_year['year'],
    y=songs_per_year['duration_ms'],
    #mode='markers',
    name='Duration (ms)'
))

# Define the dropdown menu options
dropdown_options = [
    {'label': 'Duration (ms)', 'value': 'duration_ms'},
    {'label': 'Popularity', 'value': 'popularity'},
    {'label': 'Danceability', 'value': 'danceability'},
    {'label': 'Energy', 'value': 'energy'},
    {'label': 'Key', 'value': 'key'},
    {'label': 'Loudness', 'value': 'loudness'},
    {'label': 'Mode', 'value': 'mode'},
    {'label': 'Speechiness', 'value': 'speechiness'},
    {'label': 'Acousticness', 'value': 'acousticness'},
    {'label': 'Instrumentalness', 'value': 'instrumentalness'},
    {'label': 'Liveness', 'value': 'liveness'},
    {'label': 'Valence', 'value': 'valence'},
    {'label': 'Tempo', 'value': 'tempo'},
    {'label': 'Time Signature', 'value': 'time_signature'},
    {'label': 'Count', 'value': 'count'},
    {'label': 'Duration (min)', 'value': 'duration_min'}
]

# Create the dropdown menu
dropdown_menu = go.layout.Updatemenu(
    buttons=list([
        dict(
            args=[
                {'y': [songs_per_year[option['value']]]}, 
                {'yaxis': {'title': option['label'].capitalize()}}
            ],
            label=option['label'],
            method='update'
        )
        for option in dropdown_options
    ]),
    direction='down',
    showactive=True,
)

# Update the layout with dropdown menu and initial y-axis title
fig.update_layout(
    height = 800,
    updatemenus=[dropdown_menu],
    yaxis_title='Duration (ms)',
    xaxis_title='year',
    title='Development of features',
    showlegend=True
)

fig.show()

In [None]:
songs_per_year_genres = songs_per_year.merge(by_isrc_year, on='year', how='right')

#Create the initial scatter plot for genre 'pop'
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=songs_per_year_genres['year'],
    y=songs_per_year_genres['duration_ms'],
    #mode='line',
    name='pop',
    visible=True
))

# Add separate scatter plots for each genre
genres = ['rock', 'classical', 'german', 'jazz', 'folk', 'punk', 'metal', 'country', 'french']

for genre in genres:
    fig.add_trace(go.Scatter(
        x=songs_per_year_genres['year'],
        y=songs_per_year_genres['duration_ms'],
        #mode='line',
        name=genre,
        visible=False
    ))

# Create the dropdown menu for y-axis selection
dropdown_options = ['duration_ms', 'popularity', 'danceability', 'energy', 'key', 'loudness',
                    'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
                    'valence', 'tempo', 'duration_min']

dropdown_menu = go.layout.Updatemenu(
    buttons=list([
        dict(
            args=[{'y': [songs_per_year_genres[option]]}],
            #label=option.capitalize(),
            method='update'
        )
        for option in dropdown_options
    ]),
    direction='down',
    showactive=True,
)

# Create the legend to toggle genre visibility
legend = dict(
    title='Genre',
    orientation='h',
    y=1.1,
    x=0.5,
    xanchor='center',
    bgcolor='rgba(255, 255, 255, 0.5)'
)

# Update the layout with dropdown menu, legend, and initial y-axis title
fig.update_layout(
    height = 800,
    updatemenus=[dropdown_menu],
    legend=legend,
    yaxis_title='Duration (ms)',
    title='Song Duration by Genre',
    xaxis_title='Year',
    showlegend=True
)

# Add custom buttons to toggle genre visibility in the legend
for i, genre in enumerate(genres):
    button = dict(
        label=genre.capitalize(),
        method='update',
        args=[{'visible': [True if j == i+1 else False for j in range(len(genres)+1)]}]
    )
    fig.update_traces(showlegend=True if genre == 'pop' else False, selector=dict(name=genre))
    fig.update_layout({'updatemenus': [{'buttons': [button]}]})

#fig.show()

## Development of genres of the years

In [303]:
features = sorted_genres[:10]

In [301]:
dev_genres_df = by_isrc_year.reset_index()

In [302]:
dev_genres_df

Unnamed: 0,year,pop,rock,classical,german,jazz,folk,punk,metal,country,french
0,1980,11523,8459,5302,2248,2601,3065,2165,737,1804,888
1,1981,10277,8302,5424,2471,2117,2177,2455,823,1711,1139
2,1982,10359,7874,5393,2185,2239,41,2417,803,44,962
3,1983,10033,8292,6917,3433,1998,2416,2366,1114,1483,1129
4,1984,12202,8837,7017,3047,2250,2564,2419,1532,1265,1365
5,1985,11372,8721,7751,3443,2450,2779,2251,1612,1640,1193
6,1986,11430,9258,9244,3824,2956,3772,2322,1807,1628,1278
7,1987,12310,10706,12349,4458,3599,4267,2510,2293,1876,1498
8,1988,13809,11045,16545,6465,4639,4272,2736,2380,1930,2167
9,1989,15447,12441,17686,6178,5212,4700,2634,2660,2165,2598


In [305]:
# Create the initial scatter plot
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=dev_genres_df['year'],
    y=dev_genres_df['pop'],
    #mode='markers',
    name='Pop'
))

# Create the dropdown menu
dropdown_menu = go.layout.Updatemenu(
    buttons=list([
        dict(
            args=[
                {'y': [dev_genres_df[option]]}, 
                {'yaxis': {'title': option.capitalize()}}
            ],
            label=option,
            method='update'
        )
        for option in features
    ]),
    direction='down',
    showactive=True,
)

# Update the layout with dropdown menu and initial y-axis title
fig.update_layout(
    height = 800,
    updatemenus=[dropdown_menu],
    yaxis_title='pop',
    xaxis_title='year',
    title='Development of features'
)

fig.show()

## Add the genres to the features

In [130]:
df.head()

Unnamed: 0,isrc,genres,name,artists,album,release_date,release_date_precision,uri,spotify_id,chart_power,...,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year
0,AEA040700577,"['j-pop', 'jazz', 'pop']",Bala Wala Chi,Ziad Rahbani,Houdou Nisbi,1985-01-01,day,spotify:track:0fylgLeNObjVvwhd8caHqX,0fylgLeNObjVvwhd8caHqX,,...,1,0.0418,0.952,0.382,0.237,0.371,157.168,207560,3,1985
1,AEA040700578,"['groove', 'j-pop', 'jazz', 'pop']",Houdou Nisbi,Ziad Rahbani,Houdou Nisbi,1985-01-01,day,spotify:track:0yMFpBNCYXqwwOAg23bC8a,0yMFpBNCYXqwwOAg23bC8a,,...,0,0.0308,0.814,0.951,0.0827,0.495,128.386,272240,3,1985
2,AEA040700579,"['groove', 'j-pop', 'jazz', 'pop']",Nafs Al Sheghlat,Ziad Rahbani,Houdou Nisbi,1985-01-01,day,spotify:track:6G8l1kI8QlTD0UDIak5F8H,6G8l1kI8QlTD0UDIak5F8H,,...,1,0.0449,0.954,0.887,0.271,0.304,82.63,160093,4,1985
3,AEA040700580,"['groove', 'j-pop', 'jazz', 'pop']",Yalla Kichou Barra,Ziad Rahbani,Houdou Nisbi,1985-01-01,day,spotify:track:21g76Lq5Jg4QvfTDvi4PlH,21g76Lq5Jg4QvfTDvi4PlH,,...,1,0.0341,0.43,0.838,0.124,0.656,102.485,155667,4,1985
4,AEA040700581,"['groove', 'j-pop', 'jazz', 'pop', 'pop']",Ma Tfel,Ziad Rahbani,Houdou Nisbi,1985-01-01,day,spotify:track:0pKxrkFh8fxPKpkO29MYmi,0pKxrkFh8fxPKpkO29MYmi,,...,0,0.0436,0.749,0.941,0.115,0.752,150.656,265173,4,1985


In [239]:
scaler = MinMaxScaler()
by_isrc_scaled = by_isrc.copy()
by_isrc_scaled.set_index('isrc', inplace=True)
by_isrc_scaled = by_isrc_scaled[['danceability', 'energy',
                       'key', 'loudness', 'mode', 'speechiness',
                       'acousticness', 'instrumentalness', 'liveness',
                       'valence', 'time_signature', 'tempo']]
by_isrc_scaled = pd.DataFrame(scaler.fit_transform(by_isrc_scaled), columns=by_isrc_scaled.columns, index = by_isrc_scaled.index)
by_isrc_scaled.reset_index(inplace=True)

In [240]:
by_isrc_genres = ct.merge(by_isrc_scaled, on=['isrc'], how='right')

In [241]:
by_isrc_genres.head()

Unnamed: 0,isrc,acoustic,afrobeat,alt-rock,alternative,ambient,anime,black-metal,bluegrass,blues,...,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,time_signature,tempo
0,AEA040700577,0,0,0,0,0,0,0,0,0,...,0.818182,0.620601,1.0,0.043182,0.955823,0.382,0.237,0.371,0.6,0.638375
1,AEA040700578,0,0,0,0,0,0,0,0,0,...,0.636364,0.618011,0.0,0.031818,0.817269,0.951,0.0827,0.495,0.6,0.52147
2,AEA040700579,0,0,0,0,0,0,0,0,0,...,0.272727,0.578544,1.0,0.046384,0.957831,0.887,0.271,0.304,0.8,0.335621
3,AEA040700580,0,0,0,0,0,0,0,0,0,...,0.454545,0.626845,1.0,0.035227,0.431727,0.838,0.124,0.656,0.8,0.416267
4,AEA040700581,0,0,0,0,0,0,0,0,0,...,0.454545,0.631776,0.0,0.045041,0.752008,0.941,0.115,0.752,0.8,0.611925


## Create Radar Chart for the top 10 genres

In [235]:
sorted_genres[:10]

['pop',
 'rock',
 'classical',
 'german',
 'jazz',
 'folk',
 'punk',
 'metal',
 'country',
 'french']

In [242]:
fig = go.Figure()
for genre in sorted_genres[:10]:
    df_copy = by_isrc_genres.copy()
    df_copy = df_copy[df_copy[genre] == 1]
    df_copy = df_copy[['danceability', 'energy',
                       'key', 'loudness', 'mode', 'speechiness',
                       'acousticness', 'instrumentalness', 'liveness',
                       'valence', 'time_signature', 'tempo', 'danceability']]
    df_radar = pd.DataFrame(df_copy.mean(), columns=['mean'])
    df_radar.reset_index(inplace=True)
    df_radar.rename(columns={"index":"feature"}, inplace=True)

    fig.add_trace(go.Scatterpolar(
        r = df_radar['mean'],
        theta = df_radar['feature'],
        mode = 'lines',
        fill = 'none',
        name = genre
    ))

In [243]:
fig.update_layout(
    height = 800
)
fig.show()