## Scrape Worldometer with Bs4

In [9]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import timedelta, date


In [25]:
def worldometer_scrape(days_from_today):
    """Scrape covid statistics from worldometer.

    Input por days_from_today are =0 for today, 1 for yesterday.

    """
    url = 'https://www.worldometers.info/coronavirus/'

    page = requests.get(url)
    
    soup = BeautifulSoup(page.content, 'html.parser')
    soup_table = soup.findAll('table')[days_from_today]

    header = soup_table.findAll('thead')[0]

    header_list = header.findAll('th')
    colnames = []
    for i in header_list:
        colnames.append(i.getText())

    rows = []
    # tbody is where the table is, tr its the row, td its the  cell inside the row
    body = soup_table.findAll('tbody')[0]
    body_list = body.findAll('tr')
    for i in body_list:
        row = []
        body_row = i.findAll('td')
        for j in body_row:
            row.append(j.getText().strip())
        rows.append(row)

    df = pd.DataFrame(rows, columns=colnames)
    df = df.rename(columns={"Country,Other": "country",
                            "Tests/\n1M pop\n": "Test/1M",
                            "Tot\xa0Cases/1M pop": "Tot Cases/1M",
                            "Deaths/1M pop": "Deaths/1M"}, errors="raise")
    df['TotalDeaths'] = df['TotalDeaths'].str.replace(",", "")
    df['TotalDeaths'] = pd.to_numeric(df['TotalDeaths'])
    df['Deaths/1M'] = df['Deaths/1M'].str.replace(",", "")
    df['Deaths/1M'] = pd.to_numeric(df['Deaths/1M'])
    df['Test/1M'] = df['Test/1M'].str.replace(",", "")
    df['Test/1M'] = pd.to_numeric(df['Test/1M'])
    
    df_final = df[['country', 'TotalCases', 'NewCases', 'TotalDeaths', 'NewDeaths',
       'TotalRecovered', 'NewRecovered', 'ActiveCases', 'Serious,Critical',
      'TotalTests', 'Population']]
    
  
    return df_final

In [27]:
df_today = worldometer_scrape(0)

#
Country,Other
TotalCases
NewCases
TotalDeaths
NewDeaths
TotalRecovered
NewRecovered
ActiveCases
Serious,Critical
Tot Cases/1M pop
Deaths/1M pop
TotalTests
Tests/
1M pop

Population
Continent
1 Caseevery X ppl
1 Deathevery X ppl
1 Testevery X ppl
New Cases/1M pop
New Deaths/1M pop
Active Cases/1M pop


In [28]:
df_today.head(15)

Unnamed: 0,country,TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",TotalTests,Population
0,North America,55053428,2005.0,1119945.0,60.0,43344194,3931.0,10589289,22307,,
1,Asia,77959744,26734.0,1149476.0,304.0,74907639,47106.0,1902629,28863,,
2,South America,38147667,177.0,1164243.0,2.0,36282162,511.0,701262,12569,,
3,Europe,61599969,55809.0,1262492.0,1278.0,56192814,36603.0,4144663,12003,,
4,Africa,8505243,,216054.0,,7826953,,462236,2536,,
5,Oceania,276844,2336.0,3383.0,12.0,216557,54.0,56904,373,,
6,,721,,15.0,,706,,0,0,,
7,World,241543616,87061.0,4915608.0,1656.0,218771025,88205.0,17856983,78651,,
8,USA,45792532,,744546.0,,35374595,,9673391,15133,668769691.0,333511317.0
9,India,34081315,266.0,452321.0,,33439331,7615.0,189663,8944,591924874.0,1397531610.0
