In [None]:
from bs4 import BeautifulSoup
import json
import pandas as pd
import pandas_datareader.data as web
import re
import requests
from sodapy import Socrata

# COVID-19 Data - Socrata

In [None]:
def covid_data():
    client = Socrata("data.cdc.gov", None)
    results = client.get("9mfq-cb36", limit=20000)
    df = pd.DataFrame.from_records(results)
    df["submission_date"] = pd.to_datetime(df["submission_date"], format="%Y-%m-%dT%H:%M:%S")
    return df

# Weekly Unemployment Claims - FRED

In [None]:
def get_icsa_data_fred(key):
    url = f'https://api.stlouisfed.org/fred/series/observations?series_id=ICSA&api_key={key}&file_type=json'
    response = requests.get(url)
    data = response.json()
    
    weeks = []
    claims = []
    
    for i in data['observations']:
        weeks.append(i['date'])
        claims.append(i['value'])

    dict = {"Date": weeks,
            "Claims": claims}    
        
    df = pd.DataFrame(dict)    
        
    return df

# Stock Data - Pandas DataReader

In [None]:
def get_stock_data(start, end, ticker):
        
    df = web.DataReader(name=ticker, data_source='yahoo', start=start, end=end)
    df.reset_index(level=0, inplace=True)
    df['Ticker'] = str(ticker).replace("^","")
    df = df[df.columns[[7,0,1,2,3,4,5,6]]]
    #stocks.append(df)
        
    #dff = pd.concat(stocks)
    return df

# Presidential Election 2020 Results - USA Today

In [None]:
def scrape_presidential_results():

    url = 'https://www.usatoday.com/elections/results/2020-11-03/presidential/'
    html_text = requests.get(url).text
    soup = BeautifulSoup(html_text, 'html.parser')
    
    all_dfs = []

    for t in soup.find_all("div", attrs={"class":"result-table-block"}):

        # State
        state_soup = t.find(attrs={"class":"result-table-header"})
        state = state_soup.text
        #print(state)

        # Candidates
        candidates_soup = t.find_all("span", attrs={"class":"result-table-col-candidate-first-name"})
        candidates = [(f'{i.text} {i.next_sibling}').replace("\n", "").replace(" *", "") for i in candidates_soup]
        party = [i[i.find("(")+1:i.find(")")]for i in candidates]
        candidates = [i[:-4].rstrip() for i in candidates]    

        # Votes
        votes_soup = t.find_all("td", attrs={"class":"result-table-col-votes"})
        votes = [i.contents[0].replace(",", "") for i in votes_soup]

        # Percentage of Votes
        vote_percentage_soup = t.find_all("td", attrs={"class":"result-table-col-percent"})
        vote_percentage = [str(round(float(i.contents[0].replace("%", ""))/100,2)) for i in vote_percentage_soup]

        # Electoral Votes
        electoral_votes_soup = t.find_all("th", attrs={"class":"result-table-col-ev"})
        electoral_votes = [i.contents[0] for i in electoral_votes_soup]
        electoral_votes = electoral_votes[1:]
        electoral_votes = [i.replace("-", "0") for i in electoral_votes]

        dict = {'Candidates': candidates,
                   'Party': party,
                   'Votes': votes,
                   'Vote Percentage': vote_percentage,
                   'Electoral Votes': electoral_votes}

        df = pd.DataFrame(dict)
        df['State'] = state
        df = df[df.columns[[5,0,1,2,3,4]]]
        all_dfs.append(df)

    final_df = pd.concat(all_dfs, ignore_index=True)
    return final_df

# Billboard Year-End Charts

In [None]:
def get_year_end_song_charts():
    
    url = 'https://www.billboard.com/charts/year-end'
    html_text = requests.get(url).text
    soup = BeautifulSoup(html_text, 'html.parser')
    links = soup.find_all("a", href=True)

    charts = []

    for i in links:
        link = i['href']
        if re.search("/charts/year-end/2020", link):
            link = link.split('charts/year-end/2020/')[1]
            if link.endswith("-songs"):
                charts.append(link)
                
    return charts

In [None]:
def scrape_year_end_chart(year, chart):
    
    url = f'https://www.billboard.com/charts/year-end/{year}/{chart}'
    html_text = requests.get(url).text
    soup = BeautifulSoup(html_text, 'html.parser')

    song_blocks = soup.find_all("article", attrs={"class":"ye-chart-item"})
    
    songs = []
    
    for i in song_blocks:
        
        temp_dict = {}
        
        # Song Ranking
        rank = i.find("div", attrs={"class":"ye-chart-item__rank"}).text.replace("\n", "")
        temp_dict["rank"] = rank
        
        # Song Title
        title = i.find("div", attrs={"class":"ye-chart-item__title"}).text.replace("\n", "")
        temp_dict["title"] = title
        

        # Song Artist
        artist = i.find("div", attrs={"class":"ye-chart-item__artist"}).text.replace("\n", "")
        temp_dict["artists"] = artist
        
        # Chart
        temp_dict["chart"] = chart
        
        songs.append(temp_dict)
        
    return songs