In [None]:
! pip install ydata_profiling mplcursors
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdate
from datetime import date, timedelta, datetime
from bs4 import BeautifulSoup as soup
from urllib.request import Request, urlopen
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px
import plotly.offline as py
import gc 
import warnings
warnings.filterwarnings("ignore")
from ydata_profiling import ProfileReport

In [2]:
today = datetime.now()
yesterday_str = "%s %d,%d" %(date.today().strftime("%b"), today.day-1, today.year)
yesterday_str

'Mar 9,2025'

### Scraping Data

In [3]:
url = "https://www.worldometers.info/coronavirus/"
req = Request(url, headers={'user_agent':"Mozila/5.0"})

webpage = urlopen(req)
page_soup = soup(webpage, "html.parser")
table = page_soup.findAll("table", {"id":"main_table_countries_yesterday"})

containers = table[0].findAll("tr",{"style":""})
title = containers[0]
all_data = []
clean = True

for country in containers:
    country_data = []
    country_container = country.findAll("td")

    
    for i in range (1, len(country_container)):
        final_feature = country_container[i].text
        if clean:
            if i != 1 and i != len(country_container)-1:
                final_feature = final_feature.replace(",","")

                if final_feature.find('+') !=-1:
                    final_feature = final_feature.replace("-","")
                    final_feature = float(final_feature)*-1
        if final_feature == 'N/A' :
            final_feature = 0
        elif final_feature == "" or final_feature == " ":
            final_feature = -1

        country_data.append(final_feature)

    all_data.append(country_data)

In [4]:
df = pd.DataFrame(all_data)
df.drop([15,16,17,18,19,20],inplace = True , axis = 1)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,,,,,,,,,,,,,,,
1,World,704753890.0,-3959.0,7010681.0,-57.0,675619021.0,-14953.0,22124188.0,34791.0,90413.0,899.4,-1.0,-1.0,-1.0,All
2,China,503302.0,-1.0,5272.0,-1.0,379053.0,-1.0,118977.0,7557.0,347.0,4.0,160000000.0,110461.0,1448471400.0,Asia
3,USA,111820082.0,-1151.0,1219487.0,-7.0,109814428.0,-10109.0,786167.0,940.0,333985.0,3642.0,1186851502.0,3544901.0,334805269.0,North America
4,India,45035393.0,-68.0,533570.0,-1.0,0.0,0.0,0.0,0.0,32016.0,379.0,935879495.0,665334.0,1406631776.0,Asia


In [5]:
coulumn_labels = ["Country","Total Cases", "New Cases" , "Total Deaths" , "New Deaths",  "Total Recovered" ,"New Recovered", "Active Cases", "Serious/Critical", "Total Cases/ 1M pop" , "Deaths/ 1M pop", "Total Tests", "Test/1M pop","Population", "Continents"]
df.columns = coulumn_labels
df

Unnamed: 0,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,Continents
0,,,,,,,,,,,,,,,
1,World,704753890,-3959.0,7010681,-57.0,675619021,-14953.0,22124188,34791,90413,899.4,-1,-1,-1,All
2,China,503302,-1.0,5272,-1.0,379053,-1.0,118977,7557,347,4,160000000,110461,1448471400,Asia
3,USA,111820082,-1151.0,1219487,-7.0,109814428,-10109.0,786167,940,333985,3642,1186851502,3544901,334805269,North America
4,India,45035393,-68.0,533570,-1.0,0,0.0,0,0,32016,379,935879495,665334,1406631776,Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201,Saint Helena,2166,-1.0,-1,-1.0,2,-1.0,2164,-1,354211,-1,-1,-1,6115,Africa
202,Montserrat,1403,-1.0,8,-1.0,1376,-1.0,19,-1,282578,1611,17762,3577442,4965,North America
203,Niue,1059,-1.0,-1,-1.0,1056,-1.0,3,-1,652898,-1,-1,-1,1622,Australia/Oceania
204,Tokelau,80,-1.0,-1,-1.0,-1,-1.0,80,-1,58055,-1,-1,-1,1378,Australia/Oceania


In [6]:
for label in df.columns:
    if label not in ['Country', 'Continents']:
        df[label] = pd.to_numeric(df[label], errors='coerce') 

In [7]:
df["%Inc Cases"] = df["New Cases"]/df["Total Cases"]*100
df["%Inc Deaths"] = df["New Deaths"]/df["Total Deaths"]*100
df["%Inc Recovered"] = df["New Recovered"]/df["Total Recovered"]*100

In [8]:
df.head()

Unnamed: 0,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,Continents,%Inc Cases,%Inc Deaths,%Inc Recovered
0,,,,,,,,,,,,,,,,,,
1,World,704753890.0,-3959.0,7010681.0,-57.0,675619021.0,-14953.0,22124188.0,34791.0,90413.0,899.4,-1.0,-1.0,-1.0,All,-0.000562,-0.000813,-0.002213
2,China,503302.0,-1.0,5272.0,-1.0,379053.0,-1.0,118977.0,7557.0,347.0,4.0,160000000.0,110461.0,1448471000.0,Asia,-0.000199,-0.018968,-0.000264
3,USA,111820082.0,-1151.0,1219487.0,-7.0,109814428.0,-10109.0,786167.0,940.0,333985.0,3642.0,1186852000.0,3544901.0,334805300.0,North America,-0.001029,-0.000574,-0.009206
4,India,45035393.0,-68.0,533570.0,-1.0,0.0,0.0,0.0,0.0,32016.0,379.0,935879500.0,665334.0,1406632000.0,Asia,-0.000151,-0.000187,


### Active Cases till now and Total Recoveries

In [9]:
cases = df[["Total Recovered", "Active Cases", "Total Deaths"]].loc[1]
cases
cases_df = pd.DataFrame(cases).reset_index()
cases_df.columns = ["Type", "Total"]

cases_df["Percentage"] = np.round(100*cases_df["Total"]/np.sum(cases_df["Total"]),2)
cases_df["virus"] = ["COVID-19" for i in range(len(cases_df))]

fig = px.bar(cases_df, x = "virus" , y = "Percentage", color = "Type", hover_data = ["Total"])
fig.show()
fig.write_html("Plot_Total_Stats.html")


See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/Plot_Total_Stats.html

### New Cases report and New Recoveries

In [10]:
cases = df[["New Cases", "New Recovered", "New Deaths"]].loc[1]
cases
cases_df = pd.DataFrame(cases).reset_index()
cases_df.columns = ["Type", "Total"]

cases_df["Percentage"] = np.round(100*cases_df["Total"]/np.sum(cases_df["Total"]),2)
cases_df["virus"] = ["COVID-19" for i in range(len(cases_df))]

fig = px.bar(cases_df, x = "virus" , y = "Percentage", color = "Type", hover_data = ["Total"])
# fig.write_image("graph.png") 
fig.show()
fig.write_html("Plot_New_Stats.html")


See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/Plot_New_Stats.html

### Continent-wise Analysis of Total Cases, Active Cases, New Cases, and Critical/Serious Cases 

In [11]:
continent_df = df.groupby("Continents").sum().drop("All")
continent_df = continent_df.reset_index()
continent_df

Unnamed: 0,Continents,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,%Inc Cases,%Inc Deaths,%Inc Recovered
0,Africa,South AfricaMoroccoTunisiaEgyptLibyaEthiopiaRé...,12735512.0,-54.0,253569.0,-54.0,9091424.0,-47.0,331499.0,356.0,1991266.0,16684.0,110445900.0,10904366.0,1356349000.0,-0.217847,85.570998,-50.170643
1,Asia,ChinaIndiaJapanTurkeyVietnamIranIndonesiaMalay...,167157552.0,-805.0,1489811.0,-76.0,54245201.0,-32.0,1543030.0,7897.0,5580685.0,31427.0,2319533000.0,78437369.0,4580238000.0,-0.053209,-8.866938,-0.056534
2,Australia/Oceania,AustraliaNew ZealandNew CaledoniaFrench Polyne...,14879633.0,-682.0,32996.0,-19.0,14599387.0,-4312.0,33714.0,-5.0,4734517.0,9415.0,91668190.0,21293233.0,43410350.0,-1.514018,-97.524827,99.474262
3,Europe,GermanyItalyRussiaSpainNetherlandsPolandAustri...,170126755.0,-201.0,1515107.0,-55.0,159167484.0,-129.0,875695.0,363.0,16304245.0,110677.0,1771414000.0,169925554.0,543145300.0,-0.035971,-10.580335,-0.012499
4,North America,USAMexicoCanadaGuatemalaCosta RicaCubaPanamaDo...,131836272.0,-1186.0,1695409.0,-42.0,126430561.0,-10483.0,1333140.0,1070.0,8402531.0,55808.0,1325143000.0,95149170.0,597829600.0,-0.263313,-108.845664,-0.280051
5,South America,BrazilArgentinaColombiaChileBoliviaUruguayPara...,64556094.0,-1225.0,1109128.0,-11.0,60548914.0,-9.0,1999425.0,621.0,1880239.0,24629.0,203294100.0,10262729.0,385893300.0,-0.02665,-0.428744,-0.010687


In [12]:
def continent_visualization(v_list):
    for label in v_list:
        c_df = continent_df[['Continents', label]]
        c_df['Percentage'] = np.round(100 * c_df[label] / np.sum(c_df[label]), 2)
        c_df['virus'] = ['Covid-19' for _ in range(len(c_df))]

        fig = px.bar(c_df, x="virus", y="Percentage", color="Continents", hover_data=[label])
        fig.update_layout(title={"text": f"{label}"}) 
        fig.show()
        
        
        filename = f"plot_{label.replace('/', '_').replace(' ', '_')}.html"
        fig.write_html(filename)
        
        gc.collect()

cases_list = ["Total Cases", "Active Cases", "New Cases", "Serious/Critical"]
continent_visualization(cases_list)

See visuals here :  
https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/plot_Active_Cases.html  
https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/plot_New_Cases.html  
https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/plot_Serious_Critical.html  
https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/plot_Total_Cases.html


In [13]:
# cases_list = ["Total Cases" , "Active Cases", "New Cases" , "Serious/Critical"]
# death_list = ["Total Deaths", "New Deaths", "Deaths/1M"]
# recovered_list = ["Total Recovered" , "New Recovered" , "%Inc Recovered"]

In [14]:
# continent_visulaization(cases_list)

In [15]:
def continent_visualization(v_list):
    c_df = continent_df[['Continents']].copy()

    # Create a single figure with all case types plotted
    fig = px.line(title="COVID-19 Cases by Continent", markers=True)

    for label in v_list:
        c_df[label] = np.round(100 * continent_df[label] / np.sum(continent_df[label]), 2)
        fig.add_scatter(x=continent_df["Continents"], y=c_df[label], mode="lines+markers", name=label)

    # Update layout for better readability
    fig.update_layout(
        xaxis_title="Continents",
        yaxis_title="Percentage (%)",
        legend_title="Case Type"
    )

    fig.show()
    fig.write_html("SpreadAnalysisInContinents.html")
    gc.collect()

# List of cases to plot
cases_list = ["Total Cases", "Active Cases", "New Cases", "Serious/Critical"]
continent_visualization(cases_list)

See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/SpreadAnalysisInContinents.html

### Top Countries Effected

In [16]:
df = df.drop([len(df)-1])
country_df= df.drop({0,1})
country_df

Unnamed: 0,Country,Total Cases,New Cases,Total Deaths,New Deaths,Total Recovered,New Recovered,Active Cases,Serious/Critical,Total Cases/ 1M pop,Deaths/ 1M pop,Total Tests,Test/1M pop,Population,Continents,%Inc Cases,%Inc Deaths,%Inc Recovered
2,China,503302.0,-1.0,5272.0,-1.0,379053.0,-1.0,118977.0,7557.0,347.0,4.0,1.600000e+08,110461.0,1.448471e+09,Asia,-0.000199,-0.018968,-0.000264
3,USA,111820082.0,-1151.0,1219487.0,-7.0,109814428.0,-10109.0,786167.0,940.0,333985.0,3642.0,1.186852e+09,3544901.0,3.348053e+08,North America,-0.001029,-0.000574,-0.009206
4,India,45035393.0,-68.0,533570.0,-1.0,0.0,0.0,0.0,0.0,32016.0,379.0,9.358795e+08,665334.0,1.406632e+09,Asia,-0.000151,-0.000187,
5,Germany,38828995.0,-73.0,183027.0,-16.0,38240600.0,-1.0,405368.0,0.0,462891.0,2182.0,1.223324e+08,1458359.0,8.388360e+07,Europe,-0.000188,-0.008742,-0.000003
6,Brazil,38743918.0,-1.0,711380.0,-1.0,36249161.0,-1.0,1783377.0,0.0,179908.0,3303.0,6.377617e+07,296146.0,2.153536e+08,South America,-0.000003,-0.000141,-0.000003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,Tuvalu,2943.0,-1.0,1.0,-1.0,0.0,0.0,0.0,0.0,243909.0,83.0,-1.000000e+00,-1.0,1.206600e+04,Australia/Oceania,-0.033979,-100.000000,
201,Saint Helena,2166.0,-1.0,-1.0,-1.0,2.0,-1.0,2164.0,-1.0,354211.0,-1.0,-1.000000e+00,-1.0,6.115000e+03,Africa,-0.046168,100.000000,-50.000000
202,Montserrat,1403.0,-1.0,8.0,-1.0,1376.0,-1.0,19.0,-1.0,282578.0,1611.0,1.776200e+04,3577442.0,4.965000e+03,North America,-0.071276,-12.500000,-0.072674
203,Niue,1059.0,-1.0,-1.0,-1.0,1056.0,-1.0,3.0,-1.0,652898.0,-1.0,-1.000000e+00,-1.0,1.622000e+03,Australia/Oceania,-0.094429,100.000000,-0.094697


In [17]:
LOOK_AT = 20
country = country_df.columns[1:14]
fig = go.Figure()
c=0
for i in country_df.index:
    if c < LOOK_AT:
        fig.add_trace(go.Bar(name = country_df['Country'][i], x = country, y = country_df.loc[i][1:14]))
    else:
        break
    c +=1
fig.update_layout(title = {"text" : f' top {LOOK_AT} countries effected'}, yaxis_type = "log")
fig.show()
fig.write_html("TopCountriesEffected.html")

See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/TopCountriesEffected.html

### Vaccination Analysis

In [18]:
df=pd.read_csv("country_vaccinations.csv")
df.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,34.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/


In [19]:
df.isnull().sum()

country                                    0
iso_code                                   0
date                                       0
total_vaccinations                     42905
people_vaccinated                      45218
people_fully_vaccinated                47710
daily_vaccinations_raw                 51150
daily_vaccinations                       299
total_vaccinations_per_hundred         42905
people_vaccinated_per_hundred          45218
people_fully_vaccinated_per_hundred    47710
daily_vaccinations_per_million           299
vaccines                                   0
source_name                                0
source_website                             0
dtype: int64

In [20]:
#Fill NaNs with 0 and then drop all countries with iso_code = 0. This is key information that we need so dropping unknowns is the best way to handle. 
df.fillna(0, inplace = True)
df.drop(df.index[df['iso_code'] == 0], inplace = True)

In [21]:
#Check how many nulls we have. SHould be none. 
df.isnull().sum()

country                                0
iso_code                               0
date                                   0
total_vaccinations                     0
people_vaccinated                      0
people_fully_vaccinated                0
daily_vaccinations_raw                 0
daily_vaccinations                     0
total_vaccinations_per_hundred         0
people_vaccinated_per_hundred          0
people_fully_vaccinated_per_hundred    0
daily_vaccinations_per_million         0
vaccines                               0
source_name                            0
source_website                         0
dtype: int64

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86512 entries, 0 to 86511
Data columns (total 15 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   country                              86512 non-null  object 
 1   iso_code                             86512 non-null  object 
 2   date                                 86512 non-null  object 
 3   total_vaccinations                   86512 non-null  float64
 4   people_vaccinated                    86512 non-null  float64
 5   people_fully_vaccinated              86512 non-null  float64
 6   daily_vaccinations_raw               86512 non-null  float64
 7   daily_vaccinations                   86512 non-null  float64
 8   total_vaccinations_per_hundred       86512 non-null  float64
 9   people_vaccinated_per_hundred        86512 non-null  float64
 10  people_fully_vaccinated_per_hundred  86512 non-null  float64
 11  daily_vaccinations_per_milli

In [23]:
df['date'] =  pd.to_datetime(df['date'], format='%Y-%m-%d')


In [24]:
df.columns

Index(['country', 'iso_code', 'date', 'total_vaccinations',
       'people_vaccinated', 'people_fully_vaccinated',
       'daily_vaccinations_raw', 'daily_vaccinations',
       'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred',
       'people_fully_vaccinated_per_hundred', 'daily_vaccinations_per_million',
       'vaccines', 'source_name', 'source_website'],
      dtype='object')

In [25]:
df.drop(["people_fully_vaccinated","daily_vaccinations_raw","people_fully_vaccinated_per_hundred",
         "daily_vaccinations_per_million","people_vaccinated_per_hundred", "source_name","source_website"],axis=1, inplace=True)

### China (The Origin)

In [None]:
#Save CHINA data into a separate dataframe
df_CHINA = df[df["iso_code"] == 'CHN'].copy()
df_CHINA

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,daily_vaccinations,total_vaccinations_per_hundred,vaccines
15756,China,CHN,2020-12-15,1.500000e+06,0.0,0.0,0.10,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
15757,China,CHN,2020-12-16,0.000000e+00,0.0,187500.0,0.00,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
15758,China,CHN,2020-12-17,0.000000e+00,0.0,187500.0,0.00,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
15759,China,CHN,2020-12-18,0.000000e+00,0.0,187500.0,0.00,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
15760,China,CHN,2020-12-19,0.000000e+00,0.0,187500.0,0.00,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
...,...,...,...,...,...,...,...,...
16221,China,CHN,2022-03-25,3.247624e+09,0.0,4129714.0,224.87,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
16222,China,CHN,2022-03-26,3.251412e+09,0.0,4077714.0,225.13,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
16223,China,CHN,2022-03-27,3.254984e+09,0.0,4092857.0,225.38,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
16224,China,CHN,2022-03-28,3.259042e+09,0.0,4096429.0,225.66,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."


In [56]:
df_CHINA.drop(df_CHINA.index[df_CHINA['total_vaccinations'] == 0], inplace = True)
fig = px.line(df_CHINA, x="date", y="total_vaccinations", title="Total vaccinations in the CHINA")
fig.update_layout(xaxis_tickangle=-45, width=900, height=400)
fig.show()
fig.write_html("China_Total_Vaccinations.html")


See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/China_Total_Vaccinations.html

In [57]:
#Plot daily vaccinations as a function of date
fig = px.line(df_CHINA, x="date", y="daily_vaccinations", title="Daily vaccinations in China")
fig.update_layout(xaxis_tickangle=-90, width=900, height=400)
fig.show()
fig.write_html('China_Daily_Vaccination.html')

See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/CHINA_DAILY_VACCINATION.html

### PAKISTAN

In [42]:
df_PAK = df[df["iso_code"] == 'PAK'].copy()
df_PAK

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,daily_vaccinations,total_vaccinations_per_hundred,vaccines
59449,Pakistan,PAK,2021-02-02,0.0,0.0,0.0,0.00,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
59450,Pakistan,PAK,2021-02-03,0.0,0.0,3404.0,0.00,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
59451,Pakistan,PAK,2021-02-04,0.0,0.0,3404.0,0.00,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
59452,Pakistan,PAK,2021-02-05,0.0,0.0,3404.0,0.00,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
59453,Pakistan,PAK,2021-02-06,0.0,0.0,3404.0,0.00,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
...,...,...,...,...,...,...,...,...
59846,Pakistan,PAK,2022-03-06,0.0,0.0,467655.0,0.00,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
59847,Pakistan,PAK,2022-03-07,217444622.0,127697360.0,397642.0,96.56,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
59848,Pakistan,PAK,2022-03-08,218051079.0,127811729.0,419145.0,96.83,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
59849,Pakistan,PAK,2022-03-09,218689769.0,127942647.0,449967.0,97.11,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."


In [58]:
df_CHINA.drop(df_CHINA.index[df_CHINA['total_vaccinations'] == 0], inplace = True)
fig = px.line(df_CHINA, x="date", y="total_vaccinations", title="Total vaccinations in the PAKISTAN")
fig.update_layout(xaxis_tickangle=-45, width=900, height=400)
fig.show()
fig.write_html("Pakistan_Total_Vaccinations.html")

See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/Pakistan_Total_Vaccinations.html

In [59]:
fig = px.line(df_PAK, x="date", y="daily_vaccinations", title="Daily vaccinations in PAKISTAN")
fig.update_layout(xaxis_tickangle=-90, width=900, height=400)
fig.show()
fig.write_html('Pakistan_Daily_Vaccination.html')

See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/Pakistan_Daily_Vaccinations.html

### Top 10 most Vaccinated Countries

In [53]:
vacc_by_country = df.groupby('country').max().sort_values('total_vaccinations', ascending=False)
vacc_by_country = vacc_by_country.iloc[:10]
vacc_by_country

Unnamed: 0_level_0,iso_code,date,total_vaccinations,people_vaccinated,daily_vaccinations,total_vaccinations_per_hundred,vaccines
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
China,CHN,2022-03-29,3263129000.0,1275541000.0,22424286.0,225.94,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
India,IND,2022-03-29,1834501000.0,984838100.0,10037995.0,131.66,"Covaxin, Oxford/AstraZeneca, Sputnik V"
United States,USA,2022-03-28,560181800.0,255362400.0,3506960.0,168.72,"Johnson&Johnson, Moderna, Pfizer/BioNTech"
Brazil,BRA,2022-03-29,413559600.0,181078100.0,1941268.0,193.26,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi..."
Indonesia,IDN,2022-03-29,377108900.0,196240900.0,1897011.0,136.45,"Johnson&Johnson, Moderna, Novavax, Oxford/Astr..."
Japan,JPN,2022-03-29,254345600.0,102467500.0,1997542.0,201.78,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech"
Bangladesh,BGD,2022-03-29,243642700.0,127544100.0,3758404.0,146.5,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ..."
Pakistan,PAK,2022-03-10,219368600.0,128074100.0,2175773.0,97.41,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."
Vietnam,VNM,2022-03-22,203144400.0,79947190.0,1675471.0,206.93,"Abdala, Moderna, Oxford/AstraZeneca, Pfizer/Bi..."
Mexico,MEX,2022-03-29,191907900.0,85580290.0,1648223.0,147.32,"CanSino, Johnson&Johnson, Moderna, Oxford/Astr..."


In [54]:
vacc_by_country = vacc_by_country.sort_values('total_vaccinations_per_hundred', ascending=False)
vacc_by_country

Unnamed: 0_level_0,iso_code,date,total_vaccinations,people_vaccinated,daily_vaccinations,total_vaccinations_per_hundred,vaccines
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
China,CHN,2022-03-29,3263129000.0,1275541000.0,22424286.0,225.94,"CanSino, Sinopharm/Beijing, Sinopharm/Wuhan, S..."
Vietnam,VNM,2022-03-22,203144400.0,79947190.0,1675471.0,206.93,"Abdala, Moderna, Oxford/AstraZeneca, Pfizer/Bi..."
Japan,JPN,2022-03-29,254345600.0,102467500.0,1997542.0,201.78,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech"
Brazil,BRA,2022-03-29,413559600.0,181078100.0,1941268.0,193.26,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi..."
United States,USA,2022-03-28,560181800.0,255362400.0,3506960.0,168.72,"Johnson&Johnson, Moderna, Pfizer/BioNTech"
Mexico,MEX,2022-03-29,191907900.0,85580290.0,1648223.0,147.32,"CanSino, Johnson&Johnson, Moderna, Oxford/Astr..."
Bangladesh,BGD,2022-03-29,243642700.0,127544100.0,3758404.0,146.5,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ..."
Indonesia,IDN,2022-03-29,377108900.0,196240900.0,1897011.0,136.45,"Johnson&Johnson, Moderna, Novavax, Oxford/Astr..."
India,IND,2022-03-29,1834501000.0,984838100.0,10037995.0,131.66,"Covaxin, Oxford/AstraZeneca, Sputnik V"
Pakistan,PAK,2022-03-10,219368600.0,128074100.0,2175773.0,97.41,"CanSino, Covaxin, Moderna, Oxford/AstraZeneca,..."


In [60]:
fig = px.bar(vacc_by_country, 
             x=vacc_by_country.index, 
             y="total_vaccinations_per_hundred", 
             labels={"total_vaccinations_per_hundred": "Vaccinations per 100", "index": "Country"},
             title="Vaccinations per 100 by Country")

fig.update_layout(xaxis_tickangle=-90, width=900, height=400)
fig.show()
fig.write_html("Vaccination_in_top_effected_countries.html")

See visuals here : https://tayyab5733.github.io/COVID-19-Dataset-and-Vaccine-Impact-Analyzer/Vaccination_in_top_effected_countries.html