In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
covid_df = pd.read_csv("owid-covid-data.csv")
covid_df.head()

# View column names
print(covid_df.columns)

# Check for missing values
missing_values = covid_df.isnull().sum()
missing_values[missing_values > 0]

# Filter selected countries
countries = ['United States', 'India', 'Kenya']
covid_df = covid_df[covid_df['location'].isin(countries)]

# Drop rows with missing dates or critical values
covid_df.dropna(subset=['date', 'total_cases', 'total_deaths'], inplace=True)

# Convert date to datetime format
covid_df['date'] = pd.to_datetime(covid_df['date'])

# Fill missing numeric values
numeric_cols = covid_df.select_dtypes(include='number').columns
covid_df[numeric_cols] = covid_df[numeric_cols].interpolate()

# Total Cases Over Time
plt.figure(figsize=(14, 7))
sns.lineplot(data=covid_df, x='date', y='total_cases', hue='location')
plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend(title='Country')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

# Total Deaths Over Time
plt.figure(figsize=(14, 7))
sns.lineplot(data=covid_df, x='date', y='total_deaths', hue='location')
plt.title('Total COVID-19 Deaths Over Time')
plt.xlabel('Date')
plt.ylabel('Total Deaths')
plt.legend(title='Country')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

# Daily New Cases Comparison
plt.figure(figsize=(14, 7))
sns.lineplot(data=covid_df, x='date', y='new_cases', hue='location')
plt.title('Daily New COVID-19 Cases')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.legend(title='Country')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

# Death Rate Calculation
covid_df['death_rate'] = covid_df['total_deaths'] / covid_df['total_cases']

plt.figure(figsize=(14, 7))
sns.lineplot(data=covid_df, x='date', y='death_rate', hue='location')
plt.title('COVID-19 Death Rate Over Time')
plt.xlabel('Date')
plt.ylabel('Death Rate')
plt.legend(title='Country')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

# Filter for selected countries
selected_countries = ['United States', 'India', 'Kenya']
vaccine_data = covid_df[covid_df['location'].isin(selected_countries)]

# Plot vaccination progress
plt.figure(figsize=(14, 7))
sns.lineplot(data=vaccine_data, x='date', y='total_vaccinations', hue='location')
plt.title('Total COVID-19 Vaccinations Over Time', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend(title='Country')
plt.xticks(rotation=45)
plt.tight_layout()
plt.grid(True)
plt.show()

# Percentage of Population Vaccinated (Latest Date)
latest_date = covid_df['date'].max()
latest_vaccination = covid_df[covid_df['date'] == latest_date]
latest_vaccination = latest_vaccination[['location', 'people_vaccinated_per_hundred']].dropna()
latest_vaccination = latest_vaccination[latest_vaccination['location'].isin(selected_countries)]

plt.figure(figsize=(10, 6))
sns.barplot(data=latest_vaccination, x='location', y='people_vaccinated_per_hundred', palette='Blues_d')
plt.title(f'Percentage of Population Vaccinated as of {latest_date.date()}', fontsize=14)
plt.ylabel('% of Population Vaccinated')
plt.xlabel('Country')
plt.ylim(0, 100)
plt.grid(True)
plt.show()