Project 1: What factors have contributed to the spread of COVID-19 in different regions of the world

In [18]:
# Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
import requests
import json 

In [19]:
# Retrieve Datasets

path_covid_country = r"Dataset\Covid\country_wise_latest.csv"
path_covid_complete = r"Dataset\Covid\covid_19_clean_complete.csv"
path_air_quality = r"Dataset\Air Quality\World_cities_air_quality_and_water_pollution_2020.csv"
path_air_pollution=r"Dataset\Air Quality\World_most_polluted_countries_regions_historical_data _2018-2021.csv"
path_government_resp = r"Dataset\Government_response\COVID_gov_complete_29_03.csv"
path_vaccination_cases = r"Dataset\Vaccination\df_covid19_countries_cases.csv"
path_vaccination_rate = r"Dataset\Vaccination\df_covid19_countries_vaccination.csv"
path_weather = r"Dataset\Weather\training_data_with_weather_info_week_4.csv"


covid_country_df = pd.read_csv(path_covid_country)
covid_complete_df = pd.read_csv(path_covid_complete)
air_quality_df = pd.read_csv(path_air_quality)
air_pollution_df=pd.read_csv(path_air_pollution)
government_resp_df = pd.read_csv(path_government_resp)
vaccination_cases_df = pd.read_csv(path_vaccination_cases)
vaccination_rate_df = pd.read_csv(path_vaccination_rate)
weather_df = pd.read_csv(path_weather)

Factor 1: Impact of climate on the spread of Covid-19 (Kevin Guillemette)

Factor 2: Impact of Government Response on the spread of Covid-19 (Helena Fedorenko)

Factor 3: Impact of Vaccination Rate on the spread of Covid-19 (Brecht Nys)

In [20]:
# Merge two datasets

vaccination_df = pd.merge(vaccination_cases_df, vaccination_rate_df,on=["location", "date"])

# Remove unnecessary columns

vaccination_df = vaccination_df[["location",
                                 "date",
                                 "total_deaths",
                                 "new_deaths",
                                 "reproduction_rate",
                                 "people_vaccinated",
                                 "people_fully_vaccinated",
                                 "population"]]

# Add necessary columns

vaccination_rate = []
fully_vaccination_rate = []
death_rate = []

for index, row in vaccination_df.iterrows():
    current_population = row["population"]
    current_vaccinated = row["people_vaccinated"]
    current__fully_vaccinated = row["people_fully_vaccinated"]
    current_new_deaths = row["new_deaths"]

    vaccination_rate.append(current_vaccinated/current_population)
    fully_vaccination_rate.append(current__fully_vaccinated/current_population)
    death_rate.append(current_new_deaths/current_population)

vaccination_df["Vaccination Rate"] = vaccination_rate
vaccination_df["Fully Vaccinated Rate"] = fully_vaccination_rate
vaccination_df["Death Rate"] = death_rate

# Show Dataframe

vaccination_df.head()

Unnamed: 0,location,date,total_deaths,new_deaths,reproduction_rate,people_vaccinated,people_fully_vaccinated,population,Vaccination Rate,Fully Vaccinated Rate,Death Rate
0,Afghanistan,24/02/2020,0,0,0.0,0,0,41128772,0.0,0.0,0.0
1,Afghanistan,25/02/2020,0,0,0.0,0,0,41128772,0.0,0.0,0.0
2,Afghanistan,26/02/2020,0,0,0.0,0,0,41128772,0.0,0.0,0.0
3,Afghanistan,27/02/2020,0,0,0.0,0,0,41128772,0.0,0.0,0.0
4,Afghanistan,28/02/2020,0,0,0.0,0,0,41128772,0.0,0.0,0.0


- reproduction_rate
    The R number is a way of rating coronavirus or any disease's ability to spread.
    R is the number of people that one infected person will pass on a virus to, on average.
    If the R value is higher than one, then the number of cases keeps increasing.
- people_vaccinated: at least 1 vaccination
- people_fully_vaccinate: all required vaccinations
- vaccination_ratio: people_vaccinated / population


Factor 4: Impact of Air Pollution Rate on the spread of Covid-19 (Xing Ying Chen)

In [21]:
air_pollution_df

Unnamed: 0.1,Unnamed: 0,Rank,Country/Region,2021,2020,2019,2018,Population
0,0,1,Bangladesh,76.9,77.1,83.3,97.1,164689383
1,1,2,Chad,75.9,-,-,-,16425859
2,2,3,Pakistan,66.8,59,65.8,74.3,220892331
3,3,4,Tajikistan,59.4,30.9,-,-,9537642
4,4,5,India,58.1,51.9,58.1,72.5,1380004385
...,...,...,...,...,...,...,...,...
112,112,113,"Bonaire, Saint Eustatius and Saba",5.1,-,-,-,26221
113,113,114,Cape Verde,5.1,-,-,-,555988
114,114,115,Puerto Rico,4.8,3.7,10.2,13.7,2860840
115,115,116,U.S. Virgin Islands,4.5,3.7,3.5,-,104423


In [22]:
covid_country_df

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Confirmed last week,1 week change,1 week % increase,WHO Region
0,Afghanistan,36263,1269,25198,9796,106,10,18,3.50,69.49,5.04,35526,737,2.07,Eastern Mediterranean
1,Albania,4880,144,2745,1991,117,6,63,2.95,56.25,5.25,4171,709,17.00,Europe
2,Algeria,27973,1163,18837,7973,616,8,749,4.16,67.34,6.17,23691,4282,18.07,Africa
3,Andorra,907,52,803,52,10,0,0,5.73,88.53,6.48,884,23,2.60,Europe
4,Angola,950,41,242,667,18,1,0,4.32,25.47,16.94,749,201,26.84,Africa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182,West Bank and Gaza,10621,78,3752,6791,152,2,0,0.73,35.33,2.08,8916,1705,19.12,Eastern Mediterranean
183,Western Sahara,10,1,8,1,0,0,0,10.00,80.00,12.50,10,0,0.00,Africa
184,Yemen,1691,483,833,375,10,4,36,28.56,49.26,57.98,1619,72,4.45,Eastern Mediterranean
185,Zambia,4552,140,2815,1597,71,1,465,3.08,61.84,4.97,3326,1226,36.86,Africa


In [23]:
air_pollution_df=pd.merge(air_pollution_df,covid_country_df,on="Country/Region",how="inner")
air_pollution_df

Unnamed: 0.1,Unnamed: 0,Rank,Country/Region,2021,2020,2019,2018,Population,Confirmed,Deaths,...,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Confirmed last week,1 week change,1 week % increase,WHO Region
0,0,1,Bangladesh,76.9,77.1,83.3,97.1,164689383,226225,2965,...,2772,37,1801,1.31,55.56,2.36,207453,18772,9.05,South-East Asia
1,1,2,Chad,75.9,-,-,-,16425859,922,75,...,7,0,0,8.13,87.85,9.26,889,33,3.71,Africa
2,2,3,Pakistan,66.8,59,65.8,74.3,220892331,274289,5842,...,1176,20,3592,2.13,87.87,2.42,266096,8193,3.08,Eastern Mediterranean
3,3,4,Tajikistan,59.4,30.9,-,-,9537642,7235,60,...,43,1,58,0.83,83.32,1.00,6921,314,4.54,Europe
4,4,5,India,58.1,51.9,58.1,72.5,1380004385,1480073,33408,...,44457,637,33598,2.26,64.26,3.51,1155338,324735,28.11,South-East Asia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,107,108,Estonia,5.9,5.9,6.2,7.2,1326539,2034,69,...,0,0,1,3.39,94.54,3.59,2021,13,0.64,Europe
100,108,109,Australia,5.7,7.6,8,6.8,25499881,15303,167,...,368,6,137,1.09,60.84,1.79,12428,2875,23.13,Western Pacific
101,109,110,Bahamas,5.5,-,-,-,393248,382,11,...,40,0,0,2.88,23.82,12.09,174,208,119.54,Americas
102,110,111,Grenada,5.5,-,-,-,112519,23,0,...,0,0,0,0.00,100.00,0.00,23,0,0.00,Americas


In [31]:
#rename and drop columns

covid_air_pollution_df=air_pollution_df.drop(columns=['New cases','New deaths','New recovered','Deaths / 100 Cases','Active','Recovered / 100 Cases','Deaths / 100 Recovered','Confirmed last week','1 week change','1 week % increase'])
covid_air_pollution_df.rename(columns={"Rank":"Pollution Rank",
                                 "2021":"2021 Pollution Rank",
                                "2020":"2020 Pollution Rank",
                                "2019":"2019 Pollution Rank",
                                "2018":"2018 Pollution Rank",
                                "Confirmed":"Confirmed_Covid_Cases",
                                "Death":"Death_Covid_Cases",
                                "Recovered":"Recovered_Covid_Cases"})


Unnamed: 0.1,Unnamed: 0,Pollution Rank,Country/Region,2021 Pollution Rank,2020 Pollution Rank,2019 Pollution Rank,2018 Pollution Rank,Population,Confirmed_Covid_Cases,Deaths,Recovered_Covid_Cases,WHO Region
0,0,1,Bangladesh,76.9,77.1,83.3,97.1,164689383,226225,2965,125683,South-East Asia
1,1,2,Chad,75.9,-,-,-,16425859,922,75,810,Africa
2,2,3,Pakistan,66.8,59,65.8,74.3,220892331,274289,5842,241026,Eastern Mediterranean
3,3,4,Tajikistan,59.4,30.9,-,-,9537642,7235,60,6028,Europe
4,4,5,India,58.1,51.9,58.1,72.5,1380004385,1480073,33408,951166,South-East Asia
...,...,...,...,...,...,...,...,...,...,...,...,...
99,107,108,Estonia,5.9,5.9,6.2,7.2,1326539,2034,69,1923,Europe
100,108,109,Australia,5.7,7.6,8,6.8,25499881,15303,167,9311,Western Pacific
101,109,110,Bahamas,5.5,-,-,-,393248,382,11,91,Americas
102,110,111,Grenada,5.5,-,-,-,112519,23,0,23,Americas


In [25]:
covid_complete_df.rename(columns={"Country/Region":"Country"},inplace=True)
summary_covid_complete_df = covid_complete_df.groupby(["Country"]).sum()
summary_air_quality_df=air_quality_df.groupby(["Country"]).sum()
merge_df=pd.merge(summary_covid_complete_df,summary_air_quality_df,on="Country")
merge_df

Unnamed: 0_level_0,Lat,Long,Confirmed,Deaths,Recovered,Active,AirQuality,WaterPollution
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
