# COVID-19 Data Analysis
A simple analysis of COVID-19 data using Python and common data science libraries.

In [None]:
# Import required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better visualizations
plt.style.use('seaborn')
sns.set_palette("husl")

In [None]:
# Load COVID-19 data from Our World in Data
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
df = pd.read_csv(url)
print("Data loaded successfully!")

## Basic Data Exploration
Let's examine the structure and content of our dataset.

In [None]:
# Display basic information about the dataset
print("Dataset Shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nBasic Information:")
df.info()

## Data Cleaning and Preparation
We'll clean the data and focus on specific countries for our analysis.

In [None]:
# Select relevant columns and countries
columns_of_interest = ['location', 'date', 'total_cases', 'total_deaths', 
                      'new_cases', 'new_deaths', 'total_vaccinations']
countries = ['United States', 'India', 'United Kingdom', 'Brazil', 'France']

# Filter the dataset
clean_df = df[df['location'].isin(countries)][columns_of_interest].copy()
clean_df['date'] = pd.to_datetime(clean_df['date'])

## Time Series Analysis
Visualizing the progression of cases over time for selected countries.

In [None]:
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = clean_df[clean_df['location'] == country]
    plt.plot(country_data['date'], country_data['total_cases'], label=country)

plt.title('Total COVID-19 Cases Over Time')
plt.xlabel('Date')
plt.ylabel('Total Cases')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Vaccination Progress Analysis
Examining vaccination rates across different countries.

In [None]:
plt.figure(figsize=(12, 6))
for country in countries:
    country_data = clean_df[clean_df['location'] == country]
    plt.plot(country_data['date'], country_data['total_vaccinations'], label=country)

plt.title('Total Vaccinations Over Time')
plt.xlabel('Date')
plt.ylabel('Total Vaccinations')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## Additional Insights
Calculate and display key metrics for each country.

In [None]:
# Calculate summary statistics for each country
summary_stats = clean_df.groupby('location').agg({
    'total_cases': 'max',
    'total_deaths': 'max',
    'new_cases': 'mean',
    'new_deaths': 'mean'
}).round(2)

summary_stats['death_rate'] = (summary_stats['total_deaths'] / summary_stats['total_cases'] * 100).round(2)
print("Summary Statistics by Country:")
print(summary_stats)