In [13]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
from datetime import date, timedelta, datetime
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as py
import gc 
import warnings
warnings.filterwarnings("ignore")
from ydata_profiling import ProfileReport

In [16]:
today = datetime.now()
yesterday_str = "%s %d,%d" %(date.today().strftime("%b"), today.day-1, today.year)
yesterday_str

'Mar 6,2025'

In [17]:
url = "https://www.worldometers.info/coronavirus/"
req = Request(url, headers={'user_agent':"Mozila/5.0"})

webpage = urlopen(req)
page_soup = soup(webpage, "html.parser")
table = page_soup.findAll("table", {"id":"main_table_countries_yesterday"})

containers = table[0].findAll("tr",{"style":""})
title = containers[0]
all_data = []
clean = True

for country in containers:
    country_data = []
    country_container = country.findAll("td")

    
    for i in range (1, len(country_container)):
        final_feature = country_container[i].text
        if clean:
            if i != 1 and i != len(country_container)-1:
                final_feature = final_feature.replace(",","")

                if final_feature.find('+') !=-1:
                    final_feature = final_feature.replace("-","")
                    final_feature = float(final_feature)*-1
        if final_feature == 'N/A' :
            final_feature = 0
        elif final_feature == "" or final_feature == " ":
            final_feature = -1

        country_data.append(final_feature)

    all_data.append(country_data)

In [18]:
df = pd.DataFrame(all_data)
df.drop([15,16,17,18,19,20],inplace = True , axis = 1)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,,,,,,,,,,,,,,,
1,World,704753890.0,-3959.0,7010681.0,-57.0,675619021.0,-14953.0,22124188.0,34791.0,90413.0,899.4,-1.0,-1.0,-1.0,All
2,China,503302.0,-1.0,5272.0,-1.0,379053.0,-1.0,118977.0,7557.0,347.0,4.0,160000000.0,110461.0,1448471400.0,Asia
3,USA,111820082.0,-1151.0,1219487.0,-7.0,109814428.0,-10109.0,786167.0,940.0,333985.0,3642.0,1186851502.0,3544901.0,334805269.0,North America
4,India,45035393.0,-68.0,533570.0,-1.0,0.0,0.0,0.0,0.0,32016.0,379.0,935879495.0,665334.0,1406631776.0,Asia


In [19]:
coulumn_labels = ["Country","Total Cases", "New Cases" , "Total Deaths" , "New Deaths",  "Total Recovered" ,"New Recovered", "Active Cases", "Serious/Critical", "Total Cases/ 1M pop" , "Deaths/ 1M pop", "Total Tests", "Test/1M pop","Population", "Continents"]
df.columns = coulumn_labels
df

Unnamed: 0,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,Continents
0,,,,,,,,,,,,,,,
1,World,704753890,-3959.0,7010681,-57.0,675619021,-14953.0,22124188,34791,90413,899.4,-1,-1,-1,All
2,China,503302,-1.0,5272,-1.0,379053,-1.0,118977,7557,347,4,160000000,110461,1448471400,Asia
3,USA,111820082,-1151.0,1219487,-7.0,109814428,-10109.0,786167,940,333985,3642,1186851502,3544901,334805269,North America
4,India,45035393,-68.0,533570,-1.0,0,0.0,0,0,32016,379,935879495,665334,1406631776,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,Saint Helena,2166,-1.0,-1,-1.0,2,-1.0,2164,-1,354211,-1,-1,-1,6115,Africa
202,Montserrat,1403,-1.0,8,-1.0,1376,-1.0,19,-1,282578,1611,17762,3577442,4965,North America
203,Niue,1059,-1.0,-1,-1.0,1056,-1.0,3,-1,652898,-1,-1,-1,1622,Australia/Oceania
204,Tokelau,80,-1.0,-1,-1.0,-1,-1.0,80,-1,58055,-1,-1,-1,1378,Australia/Oceania


In [20]:
for label in df.columns:
    if label not in ['Country', 'Continent']:
        df[label] = pd.to_numeric(df[label], errors='coerce') 

In [21]:
df["%Inc Cases"] = df["New Cases"]/df["Total Cases"]*100
df["%Inc Deaths"] = df["New Deaths"]/df["Total Deaths"]*100
df["%Inc Recovered"] = df["New Recovered"]/df["Total Recovered"]*100

In [22]:
df.head()

Unnamed: 0,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,Continents,%Inc Cases,%Inc Deaths,%Inc Recovered
0,,,,,,,,,,,,,,,,,,
1,World,704753890.0,-3959.0,7010681.0,-57.0,675619021.0,-14953.0,22124188.0,34791.0,90413.0,899.4,-1.0,-1.0,-1.0,,-0.000562,-0.000813,-0.002213
2,China,503302.0,-1.0,5272.0,-1.0,379053.0,-1.0,118977.0,7557.0,347.0,4.0,160000000.0,110461.0,1448471000.0,,-0.000199,-0.018968,-0.000264
3,USA,111820082.0,-1151.0,1219487.0,-7.0,109814428.0,-10109.0,786167.0,940.0,333985.0,3642.0,1186852000.0,3544901.0,334805300.0,,-0.001029,-0.000574,-0.009206
4,India,45035393.0,-68.0,533570.0,-1.0,0.0,0.0,0.0,0.0,32016.0,379.0,935879500.0,665334.0,1406632000.0,,-0.000151,-0.000187,


In [23]:
cases = df[["Total Recovered", "Active Cases", "Total Deaths"]].loc[1]
cases
cases_df = pd.DataFrame(cases).reset_index()
cases_df.columns = ["Type", "Total"]

cases_df["Percentage"] = np.round(100*cases_df["Total"]/np.sum(cases_df["Total"]),2)
cases_df["virus"] = ["COVID-19" for i in range(len(cases_df))]

fig = px.bar(cases_df, x = "virus" , y = "Percentage", color = "Type", hover_data = ["Total"])
fig.show()

In [9]:
cases = df[["New Cases", "New Recovered", "New Deaths"]].loc[1]
cases
cases_df = pd.DataFrame(cases).reset_index()
cases_df.columns = ["Type", "Total"]

cases_df["Percentage"] = np.round(100*cases_df["Total"]/np.sum(cases_df["Total"]),2)
cases_df["virus"] = ["COVID-19" for i in range(len(cases_df))]

fig = px.bar(cases_df, x = "virus" , y = "Percentage", color = "Type", hover_data = ["Total"])
# fig.write_image("graph.png") 
fig.show()

In [24]:
per = np.round(df[["%Inc Cases", "%Inc Deaths"	, "%Inc Recovered"]].loc[0],2)
per_df = pd.DataFrame(per)
per_df.columns = ["Percentage"]
fig = go.Figure()


fig.add_trace(go.Bar(x = per_df.index, y = per_df['Percentage'], marker_color = ["Yellow", "blue", "red"]))
fig.show()

In [11]:
continent_df = df.groupby("Continent").sum().drop("All")

KeyError: 'Continent'