# COVID-19 Data Analysis

This notebook analyzes global COVID-19 data using the Our World in Data dataset. It includes data loading, exploration, and various visualizations to understand pandemic trends.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style='darkgrid')

In [None]:
df = pd.read_csv('owid-covid-data.csv')
df['date'] = pd.to_datetime(df['date'])
df.shape

In [None]:
df.info()

In [None]:
df.head()

## Daily New COVID-19 Cases in South Africa

In [None]:
sa_df = df[df['location'] == 'South Africa']
plt.figure(figsize=(12, 5))
plt.plot(sa_df['date'], sa_df['new_cases'], label='New Cases')
plt.title('Daily New COVID-19 Cases in South Africa')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend()
plt.tight_layout()
plt.show()

## Daily New COVID-19 Cases (Top 5 Countries)

In [None]:
countries = ['South Africa', 'India', 'United States', 'Brazil', 'United Kingdom']
subset = df[df['location'].isin(countries)]
plt.figure(figsize=(14, 6))
for country in countries:
    country_data = subset[subset['location'] == country]
    plt.plot(country_data['date'], country_data['new_cases'], label=country)
plt.title('Daily New COVID-19 Cases (Top 5 Countries)')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend()
plt.tight_layout()
plt.show()

## Total COVID-19 Deaths by Continent

In [None]:
latest_date = df['date'].max()
latest_df = df[df['date'] == latest_date]
continent_deaths = latest_df.groupby('continent')['total_deaths'].sum().dropna()
continent_deaths.sort_values().plot(kind='barh', figsize=(10, 5), color='tomato')
plt.title(f'Total COVID-19 Deaths by Continent as of {latest_date.date()}')
plt.xlabel('Total Deaths')
plt.ylabel('Continent')
plt.tight_layout()
plt.show()

## Top 5 Countries by Total Vaccinations

In [None]:
latest_vax = latest_df[['location', 'total_vaccinations']].dropna()
top_vax = latest_vax.sort_values('total_vaccinations', ascending=False).head(5)
plt.figure(figsize=(10, 6))
sns.barplot(data=top_vax, y='location', x='total_vaccinations', palette='viridis')
plt.title(f'Top 5 Countries by Total Vaccinations as of {latest_date.date()}')
plt.xlabel('Total Vaccinations')
plt.ylabel('Country')
plt.tight_layout()
plt.show()

## South Africa: Total Cases vs. Total Deaths Over Time

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(sa_df['date'], sa_df['total_cases'], label='Total Cases', color='blue')
plt.plot(sa_df['date'], sa_df['total_deaths'], label='Total Deaths', color='red')
plt.title('South Africa: Total Cases vs. Total Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Count')
plt.legend()
plt.tight_layout()
plt.show()

## Global Daily New Cases vs. New Deaths

In [None]:
global_df = df.groupby('date')[['new_cases', 'new_deaths']].sum().reset_index()
plt.figure(figsize=(14, 5))
plt.plot(global_df['date'], global_df['new_cases'], label='New Cases', color='blue')
plt.plot(global_df['date'], global_df['new_deaths'], label='New Deaths', color='red')
plt.title('Global Daily New COVID-19 Cases and Deaths')
plt.xlabel('Date')
plt.ylabel('Count')
plt.legend()
plt.tight_layout()
plt.show()

## Summary and Insights
- The dataset offers a detailed time series of COVID-19 metrics globally.
- Daily new cases reveal pandemic waves in each region.
- Deaths by continent reflect regional population and healthcare impacts.
- Vaccination progress varies widely across countries.
- Visualizations make trends and comparisons much easier to understand.