In [None]:
#dependencies
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st
from scipy.stats import linregress

In [None]:
summary_data = pd.read_csv("data/worldometer_coronavirus_summary_data.csv")
summary_data

In [None]:
summary_data.sort_values("total_deaths", ascending=False).head(20)

In [None]:
kenya_data = summary_data.loc[summary_data["country"] == "Kenya"]
kenya_data

In [None]:
ken_df = pd.DataFrame({"Country" : summary_data["country"],
                      "Total Confirmed" : summary_data["total_confirmed"],
                      "Total Deaths" : summary_data["total_deaths"],
                      "Total Recovered" : summary_data["total_recovered"],
                      "Active Cases" : summary_data["active_cases"]})
ken_df = ken_df.loc[ken_df["Country"] == "Kenya"]
ken_df

In [None]:
#bar graph displaying effect of COVID in Kenya
Kenya = [255355, 5335.0, 248398.0, 1622.0]
x_axis = np.arange(len(Kenya))
plt.bar(x_axis, Kenya, align="center", color="r")

tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, ["Total Confirmed", "Total Deaths", "Total Recovered", "Active Cases"])
plt.title("COVID-19 in Kenya")
plt.xlabel("Kenya COVID-19 Cases")
plt.ylabel("Population")
plt.savefig("images/COVID_in_kenya.png")

plt.show()

In [None]:
uk_data = summary_data.loc[summary_data["country"] == "UK"]
uk_data

In [None]:
uk_df = pd.DataFrame({"Country" : summary_data["country"],
                      "Total Confirmed" : summary_data["total_confirmed"],
                      "Total Deaths" : summary_data["total_deaths"],
                      "Total Recovered" : summary_data["total_recovered"],
                      "Active Cases" : summary_data["active_cases"]})
uk_df = uk_df.loc[uk_df["Country"] == "UK"]
uk_df

In [None]:
#bar graph displaying effect of COVID in the UK
UK = [10379647, 145424.0, 9156066.0, 1078157.0]
x_axis = np.arange(len(UK))
plt.bar(x_axis, UK, align="center", color="b")

tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, ["Total Confirmed", "Total Deaths", "Total Recovered", "Active Cases"])
plt.title("COVID-19 in UK")
plt.xlabel("UK COVID-19 Cases")
plt.ylabel("Population")
plt.savefig("images/COVID_in_uk.png")

plt.show()

In [None]:
#bar graph of population in Kenya and the UK
population_values = [55447839, 68391673]
x_axis = np.arange(len(population_values))
plt.bar(x_axis, population_values, align="center", color=["r", "b"])

tick_locations = [value for value in x_axis]
plt.xticks(tick_locations, ["Kenya", "UK"])
plt.title("Population difference in Kenya & UK")
plt.xlabel("Country")
plt.ylabel("Population")
plt.ylim(50000000,70000000)
plt.savefig("images/uk_vs_kenya_population.png")

In [None]:
#import daily dataset
covid_df = pd.read_csv("data/worldometer_coronavirus_daily_data.csv")
covid_df

In [None]:
kenya_df = covid_df[covid_df["country"] == "Kenya"]
kenya_df = kenya_df.dropna()
kenya_df

In [None]:
#create timeline
covid_ken = kenya_df[kenya_df.index % 30 == 0]
covid_ken.sort_values("daily_new_cases", ascending=False).head(20)

In [None]:
#line graph of daily new cases from March 2020 to November 2021 in Kenya
date = covid_ken["date"]
daily_cases = covid_ken["daily_new_cases"]

plt.plot(date, daily_cases, color="b")
plt.title("New COVID Cases in Kenya Daily from March 2020 to November 2021")
plt.xticks(rotation=90)
plt.xlabel("Timeline")
plt.ylabel("Number of New COVID Cases")
plt.savefig("images/Kenya_daily_new_cases.png")
plt.show()

In [None]:
covid_ken.sort_values("daily_new_deaths", ascending=False)

In [None]:
#line graph of daily deaths from March 2020 to November 2021 in Kenya
deaths = covid_ken["daily_new_deaths"]
date = covid_ken["date"]
plt.plot(date, deaths, color="r")
plt.title("Daily New Deaths in Kenya from March 2020 to November 2021")
plt.xticks(rotation=90)
plt.xlabel("Timeline")
plt.ylabel("Number of COVID Deaths")
plt.savefig("images/Kenya_daily_deaths.png")
plt.show()

In [None]:
#drop n/a values
daily_cases = daily_cases.dropna()
deaths = deaths.dropna()

In [None]:
#scatter plot of daily cases vs daily deaths in Kenya
plt.scatter(daily_cases, deaths, marker=".")
plt.xlabel("Daily Cases")
plt.ylabel("Daily Deaths")
plt.title("Daily Cases vs Deaths in Kenya")

In [None]:
#adding linear regression to the scatter plot
x_axis = daily_cases
y_axis = deaths

result = st.linregress(x_axis, y_axis)
regress_values = x_axis * result.slope + result.intercept

correlation = st.pearsonr(x_axis, y_axis)
print(f"The correlation coefficient between the daily new cases and deaths in Kenya is {correlation[0]}")

equation = "y = " + str(round(result.slope,2)) + "x + " + str(round(result.intercept,2))
print(equation)

plt.scatter(x_axis, y_axis, marker=".")
plt.title("Daily Cases vs Deaths in Kenya")
plt.xlabel("Daily New Cases")
plt.ylabel("Daily New Deaths")

plt.plot(x_axis, regress_values, "r-")
plt.annotate(equation, (550,1), color="r", fontsize=12)
plt.savefig("images/kenya_daily_cases_vs_daily_deaths.png")
plt.show()

In [None]:
#importing vaccination dataset
vaccine_data = pd.read_csv("data/country_vaccinations.csv")

#setting date to the format to enable merging
vaccine_data["date"] = pd.to_datetime(vaccine_data.date)
covid_df["date"] = pd.to_datetime(covid_df.date)

In [None]:
#selecting data only relevant to Kenya
kenya_vaccine = vaccine_data[vaccine_data["country"] == "Kenya"]
kenya_vaccine.to_csv("data/Kenya_vaccination_data.csv", index=False, header=True)
kenya_covid = covid_df[covid_df["country"] == "Kenya"]

In [None]:
#merging two datasets for Kenya to display full dataframe of vaccination and covid cases
kenya_merged = pd.merge(kenya_vaccine,kenya_covid, on="date")
kenya_merged = kenya_merged.dropna()
kenya_merged

In [None]:
#scatter plot of daily cases vs daily vaccination in Kenya
kenya_merged_vacc = kenya_merged["daily_vaccinations"]
kenya_merged_cases = kenya_merged["daily_new_cases"]
plt.scatter(kenya_merged_vacc, kenya_merged_cases, marker="o", facecolors="blue", alpha=0.75)
plt.xlabel("Daily VaccinationsNew Cases")
plt.ylabel("Daily New Cases")
plt.title("Daily Daily Vaccinations New Cases vs Daily New Cases in Kenya")

In [None]:
#adding linear regression to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(kenya_merged_vacc, kenya_merged_cases)
regress_values = kenya_merged_vacc * slope + intercept
line_eq= "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared is: {rvalue**2}")
plt.scatter(kenya_merged_vacc, kenya_merged_cases, marker="o", facecolors="blue", alpha=0.75)
plt.plot(kenya_merged_vacc, regress_values, "r-")
plt.annotate(line_eq, (10000,200), fontsize=12, color="red")
plt.xlabel("Daily Vaccinations")
plt.ylabel("Daily New Cases")
plt.title("Daily Vaccinations vs Daily New Cases in Kenya")
plt.savefig("images/kenya_daily_cases_vs_vaccination.png")
plt.show

In [None]:
#creating scatter plot of vaccination over time
kenya_time=kenya_merged["date"]
daily_vacc=kenya_merged["daily_vaccinations"]
plt.scatter(kenya_time, daily_vacc, marker="o", facecolors="red", edgecolors="black",alpha=0.75)
plt.xticks(rotation=45)
plt.ylabel("Vaccination")
plt.title("Vaccination Over Time in Kenya")
plt.savefig("images/kenya_vaccination_over_time.png")