In [2]:
import pandas as pd

#Load data using pandas.read_csv()
df = pd.read_csv('owid-covid-data.csv')

#Check columns: df.columns
df.columns
#Identify missing values: df.isnull().sum()
df.isnull().sum()
#Handle missing numeric values with fillna() or interpolate()
df.fillna(0, inplace=True)
#Drop rows with missing dates/critical values.
df.dropna(inplace=True)
#Convert date column to datetime: pd.to_datetime()
pd.to_datetime(df['date'])
#Preview rows: df.head()
df.head()

  df = pd.read_csv('owid-covid-data.csv')


Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-01-03,0.0,0.0,0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,41128772.0,0.0,0.0,0.0,0.0
1,AFG,Asia,Afghanistan,2020-01-04,0.0,0.0,0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,41128772.0,0.0,0.0,0.0,0.0
2,AFG,Asia,Afghanistan,2020-01-05,0.0,0.0,0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,41128772.0,0.0,0.0,0.0,0.0
3,AFG,Asia,Afghanistan,2020-01-06,0.0,0.0,0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,41128772.0,0.0,0.0,0.0,0.0
4,AFG,Asia,Afghanistan,2020-01-07,0.0,0.0,0,0.0,0.0,0.0,...,0.0,37.746,0.5,64.83,0.511,41128772.0,0.0,0.0,0.0,0.0


In [None]:
import matplotlib.pyplot as plt
#Filter countries of interest (e.g., Kenya, USA, India)
selected_countries = ['Afghanistan', 'American Samoa', 'Andorra', 'Bolivia', 'Burundi']

new_df = df[df['location'].isin(selected_countries)]

#Plot total cases over time for selected countries
plt.figure(figsize=(12, 6))
colors = ["blue", "red", "green", "yellow", "black"]
for i, country in enumerate(selected_countries):
    country_data = new_df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country, color=colors[i])


plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.title('Total cases over time selected for countries')


In [None]:
#Plot total deaths over time

plt.figure(figsize=(12, 6))
for color, country in enumerate(selected_countries):
    country_data = df[df['location'] == country]
    plt.plot(country_data['date'], country_data['total_deaths'], label="Total deaths", color=colors[color])

plt.xlabel('Date')
plt.ylabel('Total deaths')
plt.title('Total deaths over time for each country')
plt.grid(True)
plt.legend()

In [None]:
import numpy as np


if 'total_deaths' in df.columns and 'total_cases' in df.columns:
    # Avoid division by zero or NaN issues
    df['death_rate'] = df['total_deaths'] / df['total_cases']
    
    # Replace infinite or NaN values with 0
    df['death_rate'] = df['death_rate'].replace([np.inf, -np.inf], np.nan).fillna(0)
    
    # Display first few death rates
    print(df['death_rate'].head())
else:
    print("Missing one of the required columns: total_deaths or total_cases")

In [None]:
#Selected only columns where continent is Africa
african_countries = df[df['continent'] == 'Africa']
new_selected_countries = african_countries['location']
#Plotted a bar graph for the countries I just selected
df.groupby(new_selected_countries)['total_cases'].max().plot(kind='bar', figsize=(12, 6))

In [None]:
#Plot cumulative vaccinations over time for selected countries.
plt.figure(figsize=(12, 6))
for color, country in enumerate(selected_countries):
    country_data = df[df['location'] == country]
    plt.plot(country_data['total_vaccinations'], country_data['date'])
plt.title("Vaccinations over time for selected countries")
plt.xlabel("Date")
plt.ylabel("Total vaccinations")
plt.legend()

In [None]:
#Compare % vaccinated population
asian_countries = df[df['continent'] == 'Asia']

vaccinated_people = asian_countries.dropna(subset='people_fully_vaccinated')
vaccinated = vaccinated_people.sort_values('date').groupby('location').tail(1)
vaccinated = vaccinated.sort_values(by='people_fully_vaccinated')

plt.figure(figsize=(10, 12))
plt.barh(vaccinated['location'], vaccinated['people_fully_vaccinated'], color='red')
plt.ylabel("Fully vaccinated")
plt.title('COVID-19 Fully Vaccinated Population in Asia (Latest Data)')
plt.gca().invert_yaxis()

In [None]:
import plotly.express as px


fig = px.choropleth(
    df, 
    locations="iso_code", 
    color="total_cases", 
    hover_name="location", 
    animation_frame="date",
    color_continuous_scale="Reds",
    title="COVID-19 Total Cases Over Time"
)
fig.show()

In [None]:
import geopandas as gpd
shapefile_path = "ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp"
world = gpd.read_file(shapefile_path)

df = df.dropna(subset=["iso_code", "total_cases"])
df_latest = df.sort_values("date").groupby("iso_code").tail(1)
df_latest = df_latest[df_latest["iso_code"].str.len() == 3]
df_covid = df_latest[["iso_code", "location", "total_cases"]]

# Merge shapefile with COVID data
merged = world.merge(df_covid, how="left", left_on="ISO_A3", right_on="iso_code")

fig, ax = plt.subplots(figsize=(15, 10))
merged.plot(
    column="total_cases",
    cmap="Reds",
    legend=True,
    ax=ax,
    missing_kwds={
        "color": "lightgrey",
        "edgecolor": "white",
        "hatch": "///",
        "label": "No data"
    }
)
plt.title("Total COVID-19 Cases by Country", fontsize=16)
plt.axis('off')
plt.show()


In [None]:
'''
Fastest Vaccine Rollout:
Some countries have been able to implement vaccination campaigns at unusually fast rates, which might indicate effective planning, 
good healthcare infrastructure, or strong partnerships with organizations like WHO or UNICEF.

Low Vaccination Rates:
Countries with low vaccination rates might still be struggling with distribution issues, public hesitancy, 
or governmental challenges like political instability or conflict. These countries might also face logistical problems in getting vaccines to rural areas.

High Cases, Low Vaccination:
This is the type of insight you’d want to investigate deeper, as it could reveal gaps in public health strategies. 
High case numbers and low vaccination rates signal that a country needs urgent intervention or better vaccine distribution mechanisms.

Vaccination Impact on Case Numbers:
This is the proof that vaccination works. Countries with higher vaccination rates tend to see a reduction in cases because more people are protected from severe disease.

Smaller Countries with High Vaccination:
Small countries sometimes have an advantage because they are more manageable and can execute policies faster. 
High vaccination rates in these countries might be a shining example for others with larger populations to learn from.
'''