In [82]:
import sys
!{sys.executable} -m pip install duckdb-engine
!{sys.executable} -m pip install ipython-sql



In [83]:
import requests
from bs4 import BeautifulSoup

import pandas as pd
import numpy as np
import time

import seaborn
from matplotlib import pyplot

from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn import utils

In [84]:
%load_ext sql

%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:memory:

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


Research Question: Does there exist correlation between the popularity of Pitbull’s music (measured through Billboard awards and spots on Billboard Hot 100) with American unemployment since his breakthrough in 2009 to the present day?

Source datasets:
https://www.kaggle.com/datasets/dhruvildave/billboard-the-hot-100-songs
https://en.wikipedia.org/wiki/List_of_awards_and_nominations_received_by_Pitbull
https://www.bls.gov/cps/tables.htm

Billboard Hot 100 Data Cleaning:

In [85]:
billboard_df = pd.read_csv('charts.csv')
print (billboard_df.head())

         date  rank           song                         artist  last-week  \
0  2021-11-06     1     Easy On Me                          Adele        1.0   
1  2021-11-06     2           Stay  The Kid LAROI & Justin Bieber        2.0   
2  2021-11-06     3  Industry Baby        Lil Nas X & Jack Harlow        3.0   
3  2021-11-06     4     Fancy Like                   Walker Hayes        4.0   
4  2021-11-06     5     Bad Habits                     Ed Sheeran        5.0   

   peak-rank  weeks-on-board  
0          1               3  
1          1              16  
2          1              14  
3          3              19  
4          2              18  


In [86]:
pitbull_df= billboard_df.loc[billboard_df['artist'] == 'Pitbull']

In [87]:
pitbull_df.loc[pitbull_df['date'].between('2009-01-01', '2022-10-15')]

Unnamed: 0,date,rank,song,artist,last-week,peak-rank,weeks-on-board
47978,2012-09-01,79,Back In Time,Pitbull,75.0,11,20
48074,2012-08-25,75,Back In Time,Pitbull,66.0,11,19
48165,2012-08-18,66,Back In Time,Pitbull,55.0,11,18
48254,2012-08-11,55,Back In Time,Pitbull,50.0,11,17
48349,2012-08-04,50,Back In Time,Pitbull,38.0,11,16
...,...,...,...,...,...,...,...
65636,2009-04-11,37,I Know You Want Me (Calle Ocho),Pitbull,44.0,37,5
65743,2009-04-04,44,I Know You Want Me (Calle Ocho),Pitbull,50.0,44,4
65849,2009-03-28,50,I Know You Want Me (Calle Ocho),Pitbull,54.0,50,3
65953,2009-03-21,54,I Know You Want Me (Calle Ocho),Pitbull,66.0,54,2


Webscraping and data cleaning Pitbull's awards

In [88]:
page= requests.get('https://en.wikipedia.org/wiki/List_of_awards_and_nominations_received_by_Pitbull')
with open ('pitbull_awards.html', 'w', errors='replace') as writer:
    writer.write (page.text)
with open ('pitbull_awards.html', 'r') as reader:
    pitbull_awards_source = reader.read()
pitbull_awards_page= BeautifulSoup(pitbull_awards_source, 'html.parser')
table= pitbull_awards_page.find_all(['table'], class_= 'wikitable plainrowheaders')
pitbull_awards_df= pd.read_html(str(table))
pitbull_billboard_awards_df=pd.DataFrame(pitbull_awards_df[2])
print (pitbull_billboard_awards_df)
pitbull_latin_billboard_awards_df=pd.DataFrame(pitbull_awards_df[3])
print (pitbull_latin_billboard_awards_df)

       Year                             Nominee / work             Award  \
0   2011[4]                                    Pitbull  Top Latin Artist   
1   2011[4]                                 "Bon, Bon"    Top Latin Song   
2   2012[5]                                    Pitbull  Top Latin Artist   
3   2012[5]                       "Give Me Everything"    Top Radio Song   
4   2012[5]                       "Give Me Everything"      Top Rap Song   
5   2012[5]                       "Give Me Everything"  Top Hot 100 Song   
6   2012[5]                                 "Bon, Bon"    Top Latin Song   
7      2013                                    Pitbull    Top Rap Artist   
8      2013  "Bailando Por El Mundo" (with Juan Magan)    Top Latin Song   
9      2014                                    Pitbull    Top Rap Artist   
10     2014                                   "Timber"      Top Rap Song   

       Result  
0   Nominated  
1         Won  
2   Nominated  
3         Won  
4   Nom

In [89]:
pitbull_latin_billboard_awards_df['Latin']= 'Yes'
pitbull_billboard_awards_df['Latin']= 'No'
pitbull_billboard_all_awards_df = pd.merge(pitbull_billboard_awards_df, pitbull_latin_billboard_awards_df, how='outer')
print (pitbull_billboard_all_awards_df)

        Year                                     Nominee / work  \
0    2011[4]                                            Pitbull   
1    2011[4]                                         "Bon, Bon"   
2    2012[5]                                            Pitbull   
3    2012[5]                               "Give Me Everything"   
4    2012[5]                               "Give Me Everything"   
5    2012[5]                               "Give Me Everything"   
6    2012[5]                                         "Bon, Bon"   
7       2013                                            Pitbull   
8       2013          "Bailando Por El Mundo" (with Juan Magan)   
9       2014                                            Pitbull   
10      2014                                           "Timber"   
11   2009[6]                                            Pitbull   
12   2010[7]                                            Pitbull   
13   2010[7]                  "I Know You Want Me (Calle Ocho)