In [None]:
! pip install ydata_profiling mplcursors
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
from datetime import date, timedelta, datetime
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as py
import gc 
import warnings
warnings.filterwarnings("ignore")
from ydata_profiling import ProfileReport

In [2]:
today = datetime.now()
yesterday_str = "%s %d,%d" %(date.today().strftime("%b"), today.day-1, today.year)
yesterday_str

'Mar 8,2025'

### Scraping Data

In [3]:
url = "https://www.worldometers.info/coronavirus/"
req = Request(url, headers={'user_agent':"Mozila/5.0"})

webpage = urlopen(req)
page_soup = soup(webpage, "html.parser")
table = page_soup.findAll("table", {"id":"main_table_countries_yesterday"})

containers = table[0].findAll("tr",{"style":""})
title = containers[0]
all_data = []
clean = True

for country in containers:
    country_data = []
    country_container = country.findAll("td")

    
    for i in range (1, len(country_container)):
        final_feature = country_container[i].text
        if clean:
            if i != 1 and i != len(country_container)-1:
                final_feature = final_feature.replace(",","")

                if final_feature.find('+') !=-1:
                    final_feature = final_feature.replace("-","")
                    final_feature = float(final_feature)*-1
        if final_feature == 'N/A' :
            final_feature = 0
        elif final_feature == "" or final_feature == " ":
            final_feature = -1

        country_data.append(final_feature)

    all_data.append(country_data)

In [4]:
df = pd.DataFrame(all_data)
df.drop([15,16,17,18,19,20],inplace = True , axis = 1)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,,,,,,,,,,,,,,,
1,World,704753890.0,-3959.0,7010681.0,-57.0,675619021.0,-14953.0,22124188.0,34791.0,90413.0,899.4,-1.0,-1.0,-1.0,All
2,China,503302.0,-1.0,5272.0,-1.0,379053.0,-1.0,118977.0,7557.0,347.0,4.0,160000000.0,110461.0,1448471400.0,Asia
3,USA,111820082.0,-1151.0,1219487.0,-7.0,109814428.0,-10109.0,786167.0,940.0,333985.0,3642.0,1186851502.0,3544901.0,334805269.0,North America
4,India,45035393.0,-68.0,533570.0,-1.0,0.0,0.0,0.0,0.0,32016.0,379.0,935879495.0,665334.0,1406631776.0,Asia


In [5]:
coulumn_labels = ["Country","Total Cases", "New Cases" , "Total Deaths" , "New Deaths",  "Total Recovered" ,"New Recovered", "Active Cases", "Serious/Critical", "Total Cases/ 1M pop" , "Deaths/ 1M pop", "Total Tests", "Test/1M pop","Population", "Continents"]
df.columns = coulumn_labels
df

Unnamed: 0,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,Continents
0,,,,,,,,,,,,,,,
1,World,704753890,-3959.0,7010681,-57.0,675619021,-14953.0,22124188,34791,90413,899.4,-1,-1,-1,All
2,China,503302,-1.0,5272,-1.0,379053,-1.0,118977,7557,347,4,160000000,110461,1448471400,Asia
3,USA,111820082,-1151.0,1219487,-7.0,109814428,-10109.0,786167,940,333985,3642,1186851502,3544901,334805269,North America
4,India,45035393,-68.0,533570,-1.0,0,0.0,0,0,32016,379,935879495,665334,1406631776,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,Saint Helena,2166,-1.0,-1,-1.0,2,-1.0,2164,-1,354211,-1,-1,-1,6115,Africa
202,Montserrat,1403,-1.0,8,-1.0,1376,-1.0,19,-1,282578,1611,17762,3577442,4965,North America
203,Niue,1059,-1.0,-1,-1.0,1056,-1.0,3,-1,652898,-1,-1,-1,1622,Australia/Oceania
204,Tokelau,80,-1.0,-1,-1.0,-1,-1.0,80,-1,58055,-1,-1,-1,1378,Australia/Oceania


In [6]:
for label in df.columns:
    if label not in ['Country', 'Continents']:
        df[label] = pd.to_numeric(df[label], errors='coerce') 

In [7]:
df["%Inc Cases"] = df["New Cases"]/df["Total Cases"]*100
df["%Inc Deaths"] = df["New Deaths"]/df["Total Deaths"]*100
df["%Inc Recovered"] = df["New Recovered"]/df["Total Recovered"]*100

In [8]:
df.head()

Unnamed: 0,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,Continents,%Inc Cases,%Inc Deaths,%Inc Recovered
0,,,,,,,,,,,,,,,,,,
1,World,704753890.0,-3959.0,7010681.0,-57.0,675619021.0,-14953.0,22124188.0,34791.0,90413.0,899.4,-1.0,-1.0,-1.0,All,-0.000562,-0.000813,-0.002213
2,China,503302.0,-1.0,5272.0,-1.0,379053.0,-1.0,118977.0,7557.0,347.0,4.0,160000000.0,110461.0,1448471000.0,Asia,-0.000199,-0.018968,-0.000264
3,USA,111820082.0,-1151.0,1219487.0,-7.0,109814428.0,-10109.0,786167.0,940.0,333985.0,3642.0,1186852000.0,3544901.0,334805300.0,North America,-0.001029,-0.000574,-0.009206
4,India,45035393.0,-68.0,533570.0,-1.0,0.0,0.0,0.0,0.0,32016.0,379.0,935879500.0,665334.0,1406632000.0,Asia,-0.000151,-0.000187,


### Active Cases till now and Total Recoveries

In [None]:
cases = df[["Total Recovered", "Active Cases", "Total Deaths"]].loc[1]
cases
cases_df = pd.DataFrame(cases).reset_index()
cases_df.columns = ["Type", "Total"]

cases_df["Percentage"] = np.round(100*cases_df["Total"]/np.sum(cases_df["Total"]),2)
cases_df["virus"] = ["COVID-19" for i in range(len(cases_df))]

fig = px.bar(cases_df, x = "virus" , y = "Percentage", color = "Type", hover_data = ["Total"])
fig.show()
fig.write_html("Plot_Total_Stats.html")


https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/Plot_Total_Stats.html

### New Cases report and New Recoveries

In [37]:
cases = df[["New Cases", "New Recovered", "New Deaths"]].loc[1]
cases
cases_df = pd.DataFrame(cases).reset_index()
cases_df.columns = ["Type", "Total"]

cases_df["Percentage"] = np.round(100*cases_df["Total"]/np.sum(cases_df["Total"]),2)
cases_df["virus"] = ["COVID-19" for i in range(len(cases_df))]

fig = px.bar(cases_df, x = "virus" , y = "Percentage", color = "Type", hover_data = ["Total"])
# fig.write_image("graph.png") 
fig.show()
fig.write_html("Plot_New_Stats.html")


https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/Plot_New_Stats.html

### Continent-wise Analysis of Total Cases, Active Cases, New Cases, and Critical/Serious Cases 

In [12]:
continent_df = df.groupby("Continents").sum().drop("All")
continent_df = continent_df.reset_index()
continent_df

Unnamed: 0,Continents,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,%Inc Cases,%Inc Deaths,%Inc Recovered
0,Africa,South AfricaMoroccoTunisiaEgyptLibyaEthiopiaRé...,12735512.0,-54.0,253569.0,-54.0,9091424.0,-47.0,331499.0,356.0,1991266.0,16684.0,110445900.0,10904366.0,1356349000.0,-0.217847,85.570998,-50.170643
1,Asia,ChinaIndiaJapanTurkeyVietnamIranIndonesiaMalay...,167157552.0,-805.0,1489811.0,-76.0,54245201.0,-32.0,1543030.0,7897.0,5580685.0,31427.0,2319533000.0,78437369.0,4580238000.0,-0.053209,-8.866938,-0.056534
2,Australia/Oceania,AustraliaNew ZealandNew CaledoniaFrench Polyne...,14879633.0,-682.0,32996.0,-19.0,14599387.0,-4312.0,33714.0,-5.0,4734517.0,9415.0,91668190.0,21293233.0,43410350.0,-1.514018,-97.524827,99.474262
3,Europe,GermanyItalyRussiaSpainNetherlandsPolandAustri...,170126755.0,-201.0,1515107.0,-55.0,159167484.0,-129.0,875695.0,363.0,16304245.0,110677.0,1771414000.0,169925554.0,543145300.0,-0.035971,-10.580335,-0.012499
4,North America,USAMexicoCanadaGuatemalaCosta RicaCubaPanamaDo...,131836272.0,-1186.0,1695409.0,-42.0,126430561.0,-10483.0,1333140.0,1070.0,8402531.0,55808.0,1325143000.0,95149170.0,597829600.0,-0.263313,-108.845664,-0.280051
5,South America,BrazilArgentinaColombiaChileBoliviaUruguayPara...,64556094.0,-1225.0,1109128.0,-11.0,60548914.0,-9.0,1999425.0,621.0,1880239.0,24629.0,203294100.0,10262729.0,385893300.0,-0.02665,-0.428744,-0.010687


In [None]:
def continent_visualization(v_list):
    for label in v_list:
        c_df = continent_df[['Continents', label]]
        c_df['Percentage'] = np.round(100 * c_df[label] / np.sum(c_df[label]), 2)
        c_df['virus'] = ['Covid-19' for _ in range(len(c_df))]

        fig = px.bar(c_df, x="virus", y="Percentage", color="Continents", hover_data=[label])
        fig.update_layout(title={"text": f"{label}"}) 
        fig.show()
        
        
        filename = f"plot_{label.replace('/', '_').replace(' ', '_')}.html"
        fig.write_html(filename)
        
        gc.collect()

cases_list = ["Total Cases", "Active Cases", "New Cases", "Serious/Critical"]
continent_visualization(cases_list)

https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/plot_Active_Cases.html
https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/plot_New_Cases.html
https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/plot_Serious_Critical.html
https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/plot_Total_Cases.html


In [34]:
# cases_list = ["Total Cases" , "Active Cases", "New Cases" , "Serious/Critical"]
# death_list = ["Total Deaths", "New Deaths", "Deaths/1M"]
# recovered_list = ["Total Recovered" , "New Recovered" , "%Inc Recovered"]

In [35]:
# continent_visulaization(cases_list)

In [36]:
def continent_visualization(v_list):
    c_df = continent_df[['Continents']].copy()

    # Create a single figure with all case types plotted
    fig = px.line(title="COVID-19 Cases by Continent", markers=True)

    for label in v_list:
        c_df[label] = np.round(100 * continent_df[label] / np.sum(continent_df[label]), 2)
        fig.add_scatter(x=continent_df["Continents"], y=c_df[label], mode="lines+markers", name=label)

    # Update layout for better readability
    fig.update_layout(
        xaxis_title="Continents",
        yaxis_title="Percentage (%)",
        legend_title="Case Type"
    )

    fig.show()
    fig.write_html("SpreadAnalysisInContinents.html")
    gc.collect()

# List of cases to plot
cases_list = ["Total Cases", "Active Cases", "New Cases", "Serious/Critical"]
continent_visualization(cases_list)

https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/SpreadAnalysisInContinents.html