In [None]:
#Adding dependecies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress

In [None]:
#opening covid data
covid_data=pd.read_csv("data/worldometer_coronavirus_daily_data.csv")

<h1>Analysis of Covid situation in UK</h1>

In [None]:
#selecting data only relevant to UK
uk_covid=covid_data[covid_data["country"]=="UK"]
uk_covid.to_csv("cleaned_data/UK_covid_data.csv", index=False, header=True)
uk_covid.head()

<h3>Daily new Covid cases in UK</h3>

In [None]:
#selecting data from every 30th row to display monthly situation
uk_covid2 = uk_covid[uk_covid.index % 30== 0]
date=uk_covid2["date"]
daily_new_cases=uk_covid2["daily_new_cases"]
#plotting line graph of daily cases
plt.plot(date, daily_new_cases)
plt.title("Daily New Covid Cases in UK from March 2020 to November 2021")
plt.xticks(rotation=45)
plt.savefig("images/UK_daily_cases.png")
plt.show()

<h3>Daily Deaths in UK</h3>

In [None]:
#plotting line graph of daily deaths
daily_new_deaths=uk_covid2["daily_new_deaths"]
date=uk_covid2["date"]
plt.plot(date, daily_new_deaths)
plt.title("Daily New Deaths in UK from March 2020 to November 2021")
plt.xticks(rotation=45)
plt.savefig("images/UK_daily_deaths.png")
plt.show()

<h3>Daily new cases vs daily deaths in UK</h3>

In [None]:
#plotting scatter plot of daily new cases vs new deaths
plt.scatter(daily_new_cases,daily_new_deaths, marker="o", facecolors="blue", alpha=0.75)
plt.xlabel("Daily New Cases")
plt.ylabel("Daily New Deaths")
plt.title("UK Daily New Cases vs Deaths")

In [None]:
#adding linear regression to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(daily_new_cases, daily_new_deaths)
regress_values=daily_new_cases*slope+intercept
line_eq= "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(daily_new_cases,daily_new_deaths, marker="o", facecolors="blue", alpha=0.75)
plt.plot(daily_new_cases,regress_values,"r-")
plt.annotate(line_eq,(27000,600),fontsize=15, color="red")
plt.xlabel("Daily New cases")
plt.ylabel("Daily new deaths")
print(f"The r-squared is: {rvalue**2}")
plt.title("UK Daily New Cases vs Deaths")
plt.savefig("images/UK_daily_cases_vs_deaths.png")

R-squared value reall low indicating not strong correlation between Daily New Cases and Daily Deaths in UK

<h1>Analysis of Covid situation in Kenya</h1>

In [None]:
#selecting data relevant to Kenya from the main data frame
kenya_covid=covid_data[covid_data["country"]=="Kenya"]
kenya_covid.to_csv("cleaned_data/Kenya_covid_data.csv", index=False, header=True)
kenya_covid.head()

<h3>Daily Covid Cases in Kenya</h3>

In [None]:
# selecting every 30th row to show data for each month
kenya_covid2 = kenya_covid[kenya_covid.index % 30== 0]
date2=kenya_covid2["date"]
daily_new_cases_kenya=kenya_covid2["daily_new_cases"]
#plotting line graph showing daily new cases
plt.plot(date2, daily_new_cases_kenya)
plt.title("Daily New Covid Cases in Kenya from March 2020 to November 2021")
plt.xticks(rotation=45)
plt.savefig("images/kenya_daily_cases.png")
plt.show()

<h3>Daily Deaths in Kenya</h3>

In [None]:
#plotting line graph of daily deaths in UK
daily_new_deaths_kenya=kenya_covid2["daily_new_deaths"]
date2=kenya_covid2["date"]
plt.plot(date2, daily_new_deaths_kenya)
plt.title("Daily New Deaths in Kenya from March 2020 to November 2021")
plt.xticks(rotation=45)
plt.savefig("images/kenya_daily_deaths.png")
plt.show()

<h3>Daily Cases vs Deaths in Kenya</h3>

In [None]:
# dropping all nan values
daily_new_cases_kenya=daily_new_cases_kenya.dropna()
daily_new_deaths_kenya=daily_new_deaths_kenya.dropna()
#scatter plot of daily cases vs daily deaths
plt.scatter(daily_new_cases_kenya,daily_new_deaths_kenya, marker="o", facecolors="blue", alpha=0.75)
plt.xlabel("Daily New Cases")
plt.ylabel("Daily New Deaths")
plt.title("Kenya Daily New Cases vs Deaths")

In [None]:
#adding linear regression to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(daily_new_cases_kenya, daily_new_deaths_kenya)
regress_values=daily_new_cases_kenya*slope+intercept
line_eq= "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.xlabel("Daily New cases")
plt.ylabel("Daily new deaths")
print(f"The r-squared is: {rvalue**2}")
plt.title("Kenya Daily New Cases vs Deaths")
plt.scatter(daily_new_cases_kenya,daily_new_deaths_kenya, marker="o", facecolors="blue", alpha=0.75)
plt.plot(daily_new_cases_kenya,regress_values,"r-")
plt.annotate(line_eq,(600,2.5),fontsize=15, color="red")
plt.savefig("images/kenya_daily_cases_vs_deaths.png")
plt.show

The r-squared value is relatively low suggesting now strong correlation between variables.

<h2> Looking into number of cases vs vaccination rate in Kenya and UK</h2>

<h3> Kenya vaccination analysis</h3>

In [None]:
#importing vaccination data set
vaccine_data=pd.read_csv("data/country_vaccinations.csv")
#setting date to the format to enable merging
vaccine_data["date"]=pd.to_datetime(vaccine_data.date)
covid_data["date"]=pd.to_datetime(covid_data.date)

In [None]:
#selecting data only relevant to Kenya
kenya_vaccine=vaccine_data[vaccine_data["country"]=="Kenya"]
kenya_vaccine.to_csv("cleaned_data/Kenya_vaccination_data.csv", index=False, header=True)
kenya_covid=covid_data[covid_data["country"]=="Kenya"]

In [None]:
#merging two datasets for Kenya to display full dataframe of vaccination and covid cases
kenya_merged=pd.merge(kenya_vaccine,kenya_covid, on="date")
kenya_merged=kenya_merged.dropna()
kenya_merged.head()

In [None]:
#scatter plot of daily cases vs daily vaccination in Kenya
kenya_merged_vacc=kenya_merged["daily_vaccinations"]
kenya_merged_cases=kenya_merged["daily_new_cases"]
plt.scatter(kenya_merged_cases,kenya_merged_vacc, marker="o", facecolors="blue", alpha=0.75)
plt.xlabel("Daily New Cases")
plt.ylabel("Daily Vaccinations")
plt.title("Daily new cases vs daily vaccination in Kenya")

In [None]:
#adding linear regression to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(kenya_merged_cases, kenya_merged_vacc)
regress_values=kenya_merged_cases*slope+intercept
line_eq= "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared is: {rvalue**2}")
plt.scatter(kenya_merged_cases,kenya_merged_vacc, marker="o", facecolors="blue", alpha=0.75)
plt.plot(kenya_merged_cases,regress_values,"r-")
plt.annotate(line_eq,(400,2500),fontsize=15, color="red")
plt.xlabel("Daily New Cases")
plt.ylabel("Daily Vaccinations")
plt.title("Daily new cases vs daily vaccination in Kenya")
plt.savefig("images/kenya_daily_cases_vs_vaccination.png")
plt.show

In [None]:
#creating scatter plot of vaccination over time
kenya_time=kenya_merged["date"]
daily_vacc=kenya_merged["daily_vaccinations"]
plt.scatter(kenya_time, daily_vacc, marker="o", facecolors="red", edgecolors="black",alpha=0.75)
plt.xticks(rotation=45)
plt.ylabel("Vaccination")
plt.title("Vaccination over time in Kenya")
plt.savefig("images/kenya_vaccination_over_time.png")

<h3> UK vaccination analysis</h3>

In [None]:
#vaccination dataset splits UK into constituent countries
#extracting data for each country and merging in order to make the datasets correlating
eng_vaccine=vaccine_data[vaccine_data["country"]=="England"]
eng_vaccine=eng_vaccine[["country","date","daily_vaccinations"]]
scot_vaccine=vaccine_data[vaccine_data["country"]=="Scotland"]
scot_vaccine=scot_vaccine[["country","date","daily_vaccinations"]]
wal_vaccine=vaccine_data[vaccine_data["country"]=="Wales"]
wal_vaccine=wal_vaccine[["country","date","daily_vaccinations"]]
irl_vaccine=vaccine_data[vaccine_data["country"]=="Northern Ireland"]
irl_vaccine=irl_vaccine[["country","date","daily_vaccinations"]]
uk_merged1=pd.merge(eng_vaccine,scot_vaccine,on="date",how="outer",suffixes=('1', '2'))
uk_merged2=pd.merge(uk_merged1,wal_vaccine,on="date",how="outer",suffixes=('2', '3'))
uk_merged3=pd.merge(uk_merged2,irl_vaccine,on="date",how="outer",suffixes=('3', '4'))
uk_merged3=uk_merged3.fillna(value=0)
uk_merged3.head()

In [None]:
#calculating the sum of daily vaccinations in each constituent countries
#to find out th daily vaccination across the whole UK
uk_daily_vacc=[]
for row in uk_merged3:
    daily_sum=uk_merged3["daily_vaccinations1"]+uk_merged3["daily_vaccinations2"]+uk_merged3["daily_vaccinations3"]+uk_merged3["daily_vaccinations4"]
    uk_daily_vacc.append(daily_sum)
uk_daily_vacc[0]

In [None]:
#creating new data frame holding combined data for England, Wales, Scotland and Northern Ireland
#calling it the UK dataset
uk_vaccination_df=pd.DataFrame({"country":"UK",
                                "date":uk_merged3["date"],
                               "daily vaccinations":uk_daily_vacc[0]})
uk_vaccination_df.to_csv("cleaned_data/UK_vaccination_data.csv", index=False, header=True)
uk_vaccination_df.head()

In [None]:
#extracting UK data from the covid dataset
uk_covid=covid_data[covid_data["country"]=="UK"]
uk_covid=uk_covid[["date","daily_new_cases"]]

In [None]:
#merging two datasets to display full dataframe of vaccination and covid cases across UK
uk_combined=pd.merge(uk_vaccination_df,uk_covid, on="date")
uk_combined=uk_combined.dropna()
uk_combined.head()

In [None]:
#scatter plot of daily cases vs daily vaccination in UK
uk_combined_vacc=uk_combined["daily vaccinations"]
uk_combined_cases=uk_combined["daily_new_cases"]
plt.scatter(uk_combined_cases,uk_combined_vacc, marker="o", facecolors="blue", alpha=0.75)
plt.xlabel("Daily New Cases")
plt.ylabel("Daily Vaccinations")
plt.title("Daily new cases vs daily vaccination in UK")

In [None]:
#adding linear regression to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(uk_combined_cases, uk_combined_vacc)
regress_values=uk_combined_cases*slope+intercept
line_eq= "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared is: {rvalue**2}")
plt.scatter(uk_combined_cases,uk_combined_vacc, marker="o", facecolors="blue", alpha=0.75)
plt.plot(uk_combined_cases,regress_values,"r-")
plt.annotate(line_eq,(1000,100000),fontsize=15, color="red")
plt.xlabel("Daily New Cases")
plt.ylabel("Daily Vaccinations")
plt.title("Daily new cases vs daily vaccination in UK")
plt.savefig("images/uk_daily_cases_vs_vaccination.png")
plt.show

In [None]:
#selecting every 7th row from the dataset to collect weekly data
uk_combined_timeframe = uk_combined[uk_combined.index % 7== 0]

In [None]:
#creating scatter plot to display weekly vaccination progress in UK
time_uk=uk_combined_timeframe["date"]
daily_Vacc=uk_combined_timeframe["daily vaccinations"]
plt.scatter(time_uk, daily_Vacc, marker="o", facecolors="red", edgecolors="black",alpha=0.75)
plt.xticks(rotation=45)
plt.savefig("images/uk_vaccination_over_time.png")