# Imports

In [2]:
import pandas as pd
import altair as alt

## Globals

In [3]:
year = 2024

## Code

In [None]:
# Remove extra header lines
def remove_extra_headers(df):
    rows_to_drop = df[(df['Year'] == 'Year') &
                            (df['Artist'] == 'Artist') &
                            (df['Song Title'] == 'Song Title') &
                            (df['Reason'] == 'Reason')].index
    df = df.drop(rows_to_drop)
    return df

# Fill all empty reasons with n/a
def fill_empty_reasons(df):
    df['Reason'] = df['Reason'].fillna('n/a')
    return df

# Get list of unique reasons
def get_unique_reasons(df):
    return df['Reason'].unique().tolist()

# Get list of row numbers with nan reason
def get_nan_row_nums(df):
    return df[df['Reason'].isna()].index.tolist()

In [50]:
# Import song list from csv
songs_df = pd.read_csv("./Data/%s/%s_clean.csv" % (year, year))

In [51]:
# Combine redundant reasons and standardize naming scheme
songs_df.rename(columns={'Song Title': 'Title'}, inplace=True)
songs_df['Reason'] = songs_df['Reason'].str.lower()
songs_df['Reason'] = songs_df['Reason'].str.lstrip()
songs_df['Reason'] = songs_df['Reason'].replace('reason', 'n/a')
songs_df['Reason'] = songs_df['Reason'].replace('drinking / drugs / p', 'drinking/drugs/partying')
songs_df['Reason'] = songs_df['Reason'].replace('drinking / drugs / partying', 'drinking, drugs, partying')
songs_df['Reason'] = songs_df['Reason'].replace('content - sexual', 'sexual content')
songs_df['Reason'] = songs_df['Reason'].replace('language / content', 'language')
songs_df['Reason'] = songs_df['Reason'].replace('language/content - sad', 'content - sad')
songs_df['Reason'] = songs_df['Reason'].replace('content - sad', 'sad content')
songs_df['Reason'] = songs_df['Reason'].replace('content - sensitive topic', 'sensitive content')

# Text-Based Analysis

In [None]:
# Get value counts of all unique reasons
print(songs_df['Reason'].value_counts())

Reason
language                     886
sexual content               248
drinking, drugs, partying    156
sad content                   82
violence                      19
sensitive content              8
religion                       8
Name: count, dtype: int64


In [64]:
# Get value counts of all unique years
print(songs_df['Year'].value_counts().sort_index())

Year
1978      1
1984      1
2005      1
2008      1
2009      7
2010     22
2011     92
2012     77
2013     55
2014    113
2015    113
2016    127
2017      8
2018     49
2019    156
2020     78
2021    506
Name: count, dtype: int64


In [65]:
# Get value counts of all unique artists
print(songs_df['Artist'].value_counts().head(20))

Artist
Drake            44
Juice WRLD       35
Pop Smoke        29
Ariana Grande    28
Nicki Minaj      22
Future           22
The Weeknd       17
Eminem           16
Doja Cat         16
Lil Baby         16
Lil Uzi Vert     15
J. Cole          15
21 Savage        14
DaBaby           14
Rihanna          14
Kanye West       14
Polo G           13
Beyonce          12
Chris Brown      12
Pitbull          12
Name: count, dtype: int64


In [13]:
# Get value counts of all unique artists and reasons
result = pd.crosstab(songs_df['Artist'], songs_df['Reason'])
sorted_df = result.sort_values(by='violence', ascending=False)
sorted_df.head()

Reason,"drinking, drugs, partying",language,religion,sad content,sensitive content,sexual content,violence
Artist,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Taylor Swift,1,1,0,5,1,3,0
Rednex,0,0,0,0,1,0,0
BTS,2,0,0,2,1,1,1
Red Velvet,0,0,0,0,1,1,0
Hippie Sabotage,0,0,0,0,1,0,0


In [67]:
# Save cleaned data. Can be used with the CSV to Spotify app
songs_df.to_csv('./Data/%s/%s_post_clean.csv' % (year, year), index=False)

# Graphical Analysis

In [4]:
# Import song list from csv
songs_df = pd.read_csv("./Data/%s/%s_post_clean.csv" % (year, year))

In [74]:
# Create a bar chart using Altair
chart = alt.Chart(songs_df).mark_bar().encode(
    x=alt.X('Reason:N', title='Reason', sort='-y'),  # Sort by descending counts
    y=alt.Y('count():Q', title='Count'),            # Quantitative count data
    color=alt.Color('Reason:N', legend=None),       # Assign a unique color to each reason
    tooltip=['Reason', 'count()']                   # Add tooltip for interactivity
).properties(
    title='Counts of Reasons',
    width=600,
    height=400
)

# Display the chart
chart