# Importing Libraries

In [5]:
import pandas as pd 
from bs4 import BeautifulSoup
import requests as r 
import json

# First Source is JSON file Containig data about vaccinations

In [6]:
with open("vaccination.json","r") as file :
    data_vacc = json.load(file)

# Convert it to DF

In [7]:
total_vaccinations = {}

for record in data_vacc:
    country = record["country"]
    records = record["data"]
    
    for data_record in records:
        vaccinations = data_record.get("daily_people_vaccinated", 0)

        
        if country in total_vaccinations:
            total_vaccinations[country] += vaccinations
        else:
            total_vaccinations[country] = vaccinations


dataframe_vacc = pd.DataFrame(total_vaccinations.items(), columns=["Country", "Total Vaccinations"])
dataframe_vacc

Unnamed: 0,Country,Total Vaccinations
0,Afghanistan,15627466
1,Africa,521437145
2,Albania,1348540
3,Algeria,7845452
4,Andorra,57497
...,...,...
230,Wallis and Futuna,6658
231,World,4956692301
232,Yemen,1026880
233,Zambia,11597447


# Access to the Page (Worldmeters - Coronavirus cases)

In [8]:
response = r.get("https://www.worldometers.info/coronavirus/")

# Getting The Content of Web Page As Unreadable Form

In [9]:
content = response.content

# Using Beautiful soup Library to convert it to html or lxml form

In [10]:
soup = BeautifulSoup(content)

# Grapping all the table Content in variable

In [11]:
var = soup.find("table",{"id":"main_table_countries_today"})

# Split the content of the table into headers and data 

In [12]:
lines = var.find_all("tr")

headers = []
data = []

for line in lines[0].find_all("th"):
    headers.append(line.text.strip())

for line in lines[1:]:
    data_row = []
    for cell in line.find_all("td"):
        cell_text = cell.text.strip()
        data_row.append(cell_text)
    data.append(data_row)

        
Covid19_cases = pd.DataFrame(data,columns=headers)

# Data preprocessing

In [13]:
Covid19_cases.head(10)

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/\n1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
0,,North America,126822971,,1635327,,122775364,4343.0,2412280,6014.0,...,,,,North America,,,,,,
1,,Asia,217733979,7.0,1547006,,201468079,23437.0,14718894,15211.0,...,,,,Asia,,,,,,
2,,Europe,249540259,,2063819,,245545084,2869.0,1931356,5640.0,...,,,,Europe,,,,,,
3,,South America,68766612,,1356746,,66479577,511.0,930289,10104.0,...,,,,South America,,,,,,
4,,Oceania,14458069,,28511,,14291244,,138314,99.0,...,,,,Australia/Oceania,,,,,,
5,,Africa,12825765,,258782,,12086419,,480564,548.0,...,,,,Africa,,,,,,
6,,,721,,15,,706,,0,0.0,...,,,,,,,,,,
7,,World,690148376,7.0,6890206,0.0,662646473,31160.0,20611697,37616.0,...,,,,All,,,,,,
8,1.0,USA,107186441,,1166663,,105316655,,703123,848.0,...,1180527728.0,3526013.0,334805269.0,North America,3.0,287.0,0.0,,,2100.0
9,2.0,India,44992788,,531891,,44458447,,2450,,...,930797975.0,661721.0,1406631776.0,Asia,31.0,2645.0,2.0,,,2.0


In [14]:
Covid19_cases.tail(10)

Unnamed: 0,#,"Country,Other",TotalCases,NewCases,TotalDeaths,NewDeaths,TotalRecovered,NewRecovered,ActiveCases,"Serious,Critical",...,TotalTests,Tests/\n1M pop,Population,Continent,1 Caseevery X ppl,1 Deathevery X ppl,1 Testevery X ppl,New Cases/1M pop,New Deaths/1M pop,Active Cases/1M pop
237,230.0,Tokelau,5,,,,,,5,,...,,,1378.0,Australia/Oceania,276.0,,,,,3628.0
238,231.0,China,503302,,5272.0,,379053.0,,118977,,...,160000000.0,110461.0,1448471400.0,Asia,2878.0,274748.0,9.0,,,82.0
239,,Total:,126822971,,1635327.0,,122775364.0,,2412280,6014.0,...,,,,North America,,,,,,
240,,Total:,217733979,7.0,1547006.0,,201468079.0,,14718894,15211.0,...,,,,Asia,,,,,,
241,,Total:,249540259,,2063819.0,,245545084.0,,1931356,5640.0,...,,,,Europe,,,,,,
242,,Total:,68766612,,1356746.0,,66479577.0,,930289,10104.0,...,,,,South America,,,,,,
243,,Total:,14458069,,28511.0,,14291244.0,,138314,99.0,...,,,,Australia/Oceania,,,,,,
244,,Total:,12825765,,258782.0,,12086419.0,,480564,548.0,...,,,,Africa,,,,,,
245,,Total:,721,,15.0,,706.0,,0,0.0,...,,,,,,,,,,
246,,Total:,690148376,7.0,6890206.0,0.0,662646473.0,31160.0,20611697,37616.0,...,,,,All,,,,,,


# Removing Unecessary Rows

In [15]:
df_covid = Covid19_cases.drop(Covid19_cases.index[:8], axis=0)
df_covid = df_covid.drop(Covid19_cases.index[239:], axis=0)

# Removing Uncessary Columns

In [16]:
df_covid = df_covid.drop(df_covid.columns[7:],axis=1)
df_covid = df_covid.drop(df_covid.columns[0],axis=1)
df_covid = df_covid.drop(df_covid[["NewCases","NewDeaths"]],axis=1)

# Adjusting Index

In [17]:
df_covid = df_covid.reset_index(drop=True)

# Adjusting Columns

In [18]:
df_covid.columns=["Country" , "TotalCases" ,"TotalDeaths","TotalRecovered"]

In [19]:
df_covid

Unnamed: 0,Country,TotalCases,TotalDeaths,TotalRecovered
0,USA,107186441,1166663,105316655
1,India,44992788,531891,44458447
2,France,40112303,167480,39887383
3,Germany,38428685,174352,38240600
4,Brazil,37625916,703291,36249161
...,...,...,...,...
226,Vatican City,29,,29
227,Western Sahara,10,1,9
228,MS Zaandam,9,2,7
229,Tokelau,5,,


# Combining 2 Data frame in One using merge Method

In [20]:
merged_df = pd.merge(dataframe_vacc, df_covid, on='Country', how='outer')

In [21]:
merged_df = merged_df.dropna()
merged_df['Total Vaccinations'] = merged_df['Total Vaccinations'].apply(lambda x: "{:.0f}".format(x))
merged_df.reset_index(drop=True)

Unnamed: 0,Country,Total Vaccinations,TotalCases,TotalDeaths,TotalRecovered
0,Afghanistan,15627466,222457,7920,200464
1,Albania,1348540,334726,3602,329428
2,Algeria,7845452,271847,6881,183058
3,Andorra,57497,48015,165,47563
4,Angola,15748384,105384,1934,103419
...,...,...,...,...,...
194,Vietnam,90267291,11617699,43206,10639339
195,Wallis and Futuna,6658,3550,8,438
196,Yemen,1026880,11945,2159,9124
197,Zambia,11597447,345058,4058,340454
