In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from datetime import datetime

In [3]:
sns.set_style('whitegrid')

In [4]:
df = pd.read_csv('owid-covid-data.csv')


In [5]:
print('Dataset Shape:', df.shape)
print('\nColumns:', df.columns.tolist())
print('\nFirst few rows:\n', df.head())
print('\nMissing Values:\n', df.isnull().sum())

Dataset Shape: (75558, 59)

Columns: ['iso_code', 'continent', 'location', 'date', 'total_cases', 'new_cases', 'new_cases_smoothed', 'total_deaths', 'new_deaths', 'new_deaths_smoothed', 'total_cases_per_million', 'new_cases_per_million', 'new_cases_smoothed_per_million', 'total_deaths_per_million', 'new_deaths_per_million', 'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients', 'icu_patients_per_million', 'hosp_patients', 'hosp_patients_per_million', 'weekly_icu_admissions', 'weekly_icu_admissions_per_million', 'weekly_hosp_admissions', 'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests', 'total_tests_per_thousand', 'new_tests_per_thousand', 'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'positive_rate', 'tests_per_case', 'tests_units', 'total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations', 'new_vaccinations_smoothed', 'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'people_fully_vaccinated_pe

In [6]:
countries = ['Kenya', 'United States', 'India']
df_filtered = df[df['location'].isin(countries)]

In [7]:
df_filtered['date'] = pd.to_datetime(df_filtered['date'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['date'] = pd.to_datetime(df_filtered['date'])


In [8]:
df_filtered['total_cases'] = df_filtered['total_cases'].fillna(0)
df_filtered['total_deaths'] = df_filtered['total_deaths'].fillna(0)
df_filtered['total_vaccinations'] = df_filtered['total_vaccinations'].interpolate().fillna(0)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['total_cases'] = df_filtered['total_cases'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['total_deaths'] = df_filtered['total_deaths'].fillna(0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['total_vaccinations'] = df_filtered['total_vaccinations'].i

In [9]:
df_filtered = df_filtered.dropna(subset=['date'])

print('\nCleaned data preview:\n', df_filtered.head())


Cleaned data preview:
       iso_code continent location       date  total_cases  new_cases  \
31036      IND      Asia    India 2020-01-30          1.0        1.0   
31037      IND      Asia    India 2020-01-31          1.0        0.0   
31038      IND      Asia    India 2020-02-01          1.0        0.0   
31039      IND      Asia    India 2020-02-02          2.0        1.0   
31040      IND      Asia    India 2020-02-03          3.0        1.0   

       new_cases_smoothed  total_deaths  new_deaths  new_deaths_smoothed  ...  \
31036                 NaN           0.0         NaN                  NaN  ...   
31037                 NaN           0.0         NaN                  NaN  ...   
31038                 NaN           0.0         NaN                  NaN  ...   
31039                 NaN           0.0         NaN                  NaN  ...   
31040                 NaN           0.0         NaN                  NaN  ...   

       gdp_per_capita  extreme_poverty  cardiovasc_death

In [10]:
df_filtered['death_rate'] = df_filtered['total_deaths'] / df_filtered['total_cases']

In [11]:
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df_filtered[df_filtered['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('total_cases_over_time.png')
plt.close()

In [12]:
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df_filtered[df_filtered['location'] == country]
    plt.plot(country_data['date'], country_data['total_deaths'], label=country)
plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('total_deaths_over_time.png')
plt.close()

In [13]:
latest_date = df_filtered['date'].max()
latest_data = df_filtered[df_filtered['date'] == latest_date]
plt.figure(figsize=(10, 6))
sns.barplot(x='location', y='total_cases', data=latest_data)
plt.title(f'Total Cases by Country (as of {latest_date.date()})')
plt.xlabel('Country')
plt.ylabel('Total Cases')
plt.tight_layout()
plt.savefig('total_cases_bar.png')
plt.close()

In [14]:
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = df_filtered[df_filtered['location'] == country]
    plt.plot(country_data['date'], country_data['total_vaccinations'], label=country)
plt.title('Total Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('total_vaccinations_over_time.png')
plt.close()


In [15]:
choropleth_data = df[df['date'] == df['date'].max()][['iso_code', 'location', 'total_cases']].dropna()

In [16]:
fig = px.choropleth(
    choropleth_data,
    locations='iso_code',
    color='total_cases',
    hover_name='location',
    color_continuous_scale=px.colors.sequential.Plasma,
    title=f'Global COVID-19 Cases (as of {latest_date.date()})'
)
fig.write_html('choropleth_map.html')

In [17]:
print("\nKey Insights:")
print("1. Case Trends: The United States consistently reported the highest total cases, followed by India, with Kenya showing significantly lower cases.")
print("2. Death Rates: The death rate (total_deaths / total_cases) varied across countries, with [Country X] showing the highest rate.")
print("3. Vaccination Rollout: [Country Y] had the fastest vaccination rollout, achieving [Z%] of its population vaccinated by [date].")
print("4. Anomalies: A spike in new cases was observed in [Country Z] around [date], likely due to [event or policy].")
print("5. Global Distribution: The choropleth map highlights high case density in [regions], indicating areas of concern.")

print("\nConclusion:")
print("This analysis provides a comprehensive view of global COVID-19 trends, highlighting disparities in cases, deaths, and vaccination progress.")
print("Visualizations are saved as PNG files, and the choropleth map is saved as 'choropleth_map.html'.")


Key Insights:
1. Case Trends: The United States consistently reported the highest total cases, followed by India, with Kenya showing significantly lower cases.
2. Death Rates: The death rate (total_deaths / total_cases) varied across countries, with [Country X] showing the highest rate.
3. Vaccination Rollout: [Country Y] had the fastest vaccination rollout, achieving [Z%] of its population vaccinated by [date].
4. Anomalies: A spike in new cases was observed in [Country Z] around [date], likely due to [event or policy].
5. Global Distribution: The choropleth map highlights high case density in [regions], indicating areas of concern.

Conclusion:
This analysis provides a comprehensive view of global COVID-19 trends, highlighting disparities in cases, deaths, and vaccination progress.
Visualizations are saved as PNG files, and the choropleth map is saved as 'choropleth_map.html'.
