# INST414 Module 1 Analysis:
## Billboard Hot 100 Dataset


In [12]:
import pandas as pd
import requests
import time


In [14]:
billboard = pd.read_csv("hot-100-current.csv")
billboard.head()

Unnamed: 0,chart_week,current_week,title,performer,last_week,peak_pos,wks_on_chart
0,2022-01-01,1,All I Want For Christmas Is You,Mariah Carey,1.0,1,50
1,2022-01-01,2,Rockin' Around The Christmas Tree,Brenda Lee,2.0,2,44
2,2022-01-01,3,Jingle Bell Rock,Bobby Helms,4.0,3,41
3,2022-01-01,4,A Holly Jolly Christmas,Burl Ives,5.0,4,25
4,2022-01-01,5,Easy On Me,Adele,3.0,1,11


In [16]:
#group by song to get single entries with important stats
billboard['chart_week'] = pd.to_datetime(billboard['chart_week'])
agg_bb = billboard.groupby(['title', 'performer']).agg(
    first_week=('chart_week', 'min'),
    total_weeks=('wks_on_chart', 'max'),
    peak_pos=('current_week', 'min')  
).reset_index()
agg_bb.sort_values(by='first_week', ascending = False, inplace = True)

In [18]:
def get_genre_data(song):
    try:
        song_url = f'https://api.deezer.com/search/track?q={song}'
        response = requests.get(song_url)
        
        if response.status_code != 200:
            print(f"Error retrieving song data for {song}. Status code: {response.status_code}")
            return None
        song_data = response.json()
        
        if not song_data['data']:
            print(f"No results found for song: {song}")
            return None
        album_id = song_data['data'][0]['album']['id']
        album_request = requests.get(f'https://api.deezer.com/album/{album_id}')
        
        if album_request.status_code != 200:
            print(f"Error retrieving album data for {song}. Status code: {album_request.status_code}")
            return None
        album_data = album_request.json()
        
        if 'genre_id' not in album_data:
            print(f"Genre data not available for album {album_id}")
            return None
        genre_id = album_data['genre_id']
        genre_request = requests.get(f'https://api.deezer.com/genre/{genre_id}')
        
        if genre_request.status_code != 200:
            print(f"Error retrieving genre data for {song}. Status code: {genre_request.status_code}")
            return None
        genre_data = genre_request.json()
        
        return genre_data.get('name', 'Unknown Genre')
    except Exception as e:
        print(f"Something went wrong. Song was '{song}'. Error: {e}")
        return None

In [20]:
agg_bb = agg_bb[:7000]
agg_bb['genre'] = agg_bb['title'].map(get_genre_data)

Something went wrong. Song was '#SELFIE'. Error: 'data'
Something went wrong. Song was '#Beautiful'. Error: 'data'
Something went wrong. Song was '#thatPOWER'. Error: 'data'
No results found for song: Riff Off: Mickey / Like A Virgin / Hit Me With Your Best Shot...
No results found for song: 9 AM In Dallas
No results found for song: My City Of Ruins (Live From The Kennedy Center Honors)
No results found for song: Stranded (Haiti Mon Amour)


In [22]:
agg_bb['genre'].value_counts()

genre
Rap/Hip Hop             1093
Pop                     1024
Country                  550
Alternative              334
Rock                     264
R&B                      236
Electro                  110
Dance                    104
Films/Games               99
Unknown Genre             38
Asian Music               22
Christian                 21
Latin Music               15
Singer & Songwriter       11
Reggaeton                 10
Traditional Mexicano      10
Reggae                    10
Jazz                       9
Kids                       7
Folk                       7
Classical                  6
Metal                      6
Salsa                      2
Ranchera                   1
Banda/Grupero              1
Soul & Funk                1
Cumbia                     1
Brazilian Music            1
Name: count, dtype: int64

In [24]:
agg_bb.to_csv('songs_with_genres.csv')

In [41]:
df.head()

Unnamed: 0.1,Unnamed: 0,title,performer,first_week,total_weeks,peak_pos,genre
0,21406,Reflections Laughing,"The Weeknd, Travis Scott & Florence + The Machine",2025-02-15,1,53,R&B
1,7174,Enjoy The Show,The Weeknd & Future,2025-02-15,1,60,R&B
2,18811,Niagara Falls,The Weeknd,2025-02-15,1,65,R&B
3,13085,I'm The Problem,Morgan Wallen,2025-02-15,1,2,Country
4,470,7 Dias,Gabito Ballesteros & Tito Double P,2025-02-15,1,84,Traditional Mexicano


In [43]:
df.describe()

Unnamed: 0.1,Unnamed: 0,total_weeks,peak_pos
count,8000.0,8000.0,8000.0
mean,15334.340375,10.134625,51.056875
std,9189.350832,11.566394,29.254228
min,9.0,1.0,1.0
25%,7095.5,1.0,26.0
50%,15760.5,4.0,53.0
75%,22845.5,19.0,76.0
max,31764.0,91.0,100.0


In [45]:
df.to_csv('songs_with_genres_complete.csv')