## Music Trends During Election Cycles

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
df1 = pd.read_csv('../data/top10s.csv', index_col=0)
df2 = pd.read_csv('../data/spotify_dataset.csv', index_col=0)
df3 = pd.read_csv('../data/universal_top_spotify_songs.csv', index_col=0)

In [None]:
print("DF1 Head:")
df1.head()

In [None]:
print("DF2 Head:")
print(df2.head())

In [None]:
print("DF3 Head:")
df3.head()

In [3]:
# Convert the release date to datetime in df2
df2['Release Date'] = pd.to_datetime(df2['Release Date'], errors='coerce')

# Convert snapshot_date to datetime in df3
df3['snapshot_date'] = pd.to_datetime(df3['snapshot_date'], errors='coerce')

In [None]:
# Check for missing values
print(df1.isnull().sum())
print(df2.isnull().sum())
print(df3.isnull().sum())

In [4]:
# Drop rows with missing release dates in df2
df2.dropna(subset=['Release Date'], inplace=True)

# Drop rows with missing snapshot dates in df3
df3.dropna(subset=['snapshot_date'], inplace=True)

In [5]:
# Fill or drop missing values as appropriate
df1.fillna(0, inplace=True)
df2.fillna(0, inplace=True)
df3.fillna(0, inplace=True)

In [6]:
# Ensure numeric columns are in the correct format
df1['year'] = pd.to_numeric(df1['year'], errors='coerce')
df2['Danceability'] = pd.to_numeric(df2['Danceability'], errors='coerce')
df2['Energy'] = pd.to_numeric(df2['Energy'], errors='coerce')
df2['Loudness'] = pd.to_numeric(df2['Loudness'], errors='coerce')
df3['duration_ms'] = pd.to_numeric(df3['duration_ms'], errors='coerce')
df3['popularity'] = pd.to_numeric(df3['popularity'], errors='coerce')
df3['loudness'] = pd.to_numeric(df3['loudness'], errors='coerce')

In [7]:
# Filter data for the required years
df1_filtered = df1[df1['year'].isin([2015, 2016, 2019])]
df2_filtered = df2[df2['Release Date'].dt.year == 2020]
df3_filtered = df3[df3['snapshot_date'].dt.year == 2023]

In [10]:
# Combine the relevant data from all datasets
df1_genre = df1_filtered[['top genre', 'year']].rename(columns={'top genre': 'genre'})
df2_genre = df2_filtered[['Genre', 'Release Date']].rename(columns={'Genre': 'genre'})
df3_genre = df3_filtered[['name', 'snapshot_date']].copy()

In [15]:
print(df3_filtered.head())

                                         name       artists  daily_rank  \
spotify_id                                                                
3rUGC1vUpkDG9CZFHMur1t                 greedy    Tate McRae           1   
4xhsWYTOGcal8zt0J161CU            Lovin On Me   Jack Harlow           2   
1BxfuPKGuaTgP7aM0Bbdwr           Cruel Summer  Taylor Swift           3   
0R6NfOiLzLj4O5VbYSJAjf              La Diabla          Xavi           4   
3vkCueOmm7xQDoJ17W1Pm3  My Love Mine All Mine        Mitski           5   

                        daily_movement  weekly_movement country snapshot_date  \
spotify_id                                                                      
3rUGC1vUpkDG9CZFHMur1t               0               11       0    2023-12-31   
4xhsWYTOGcal8zt0J161CU               0               13       0    2023-12-31   
1BxfuPKGuaTgP7aM0Bbdwr               0               15       0    2023-12-31   
0R6NfOiLzLj4O5VbYSJAjf               0               15       0    20