In [1]:
import pandas as pd

In [2]:
# Load cleaned data
df = pd.read_csv(r'F:\Projects\2_COVID-19 Global Analysis\2_Cleaned Data\owid-covid-cleaned.csv')

In [3]:
# Quick check
print(df.head())
print(df.info())

  iso_code continent     location        date  total_cases  new_cases  \
0      AFG      Asia  Afghanistan  2020-01-05          0.0        0.0   
1      AFG      Asia  Afghanistan  2020-01-06          0.0        0.0   
2      AFG      Asia  Afghanistan  2020-01-07          0.0        0.0   
3      AFG      Asia  Afghanistan  2020-01-08          0.0        0.0   
4      AFG      Asia  Afghanistan  2020-01-09          0.0        0.0   

   total_deaths  new_deaths  total_vaccinations  new_vaccinations  
0           0.0         0.0                 0.0               0.0  
1           0.0         0.0                 0.0               0.0  
2           0.0         0.0                 0.0               0.0  
3           0.0         0.0                 0.0               0.0  
4           0.0         0.0                 0.0               0.0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 429435 entries, 0 to 429434
Data columns (total 10 columns):
 #   Column              Non-Null Count   

In [4]:
#2️⃣ Total cases & deaths worldwide
total_cases = df['total_cases'].max()
total_deaths = df['total_deaths'].max()

print(f"🌍 Total cases worldwide: {total_cases:,.0f}")
print(f"🌍 Total deaths worldwide: {total_deaths:,.0f}")


🌍 Total cases worldwide: 775,866,783
🌍 Total deaths worldwide: 7,057,132


In [5]:
#3️⃣ Top 10 countries by total cases & deaths
# Get latest date in dataset
latest_date = df['date'].max()

# Filter for latest date
latest = df[df['date'] == latest_date]

# Remove continents & aggregates if needed (OWID has 'World', 'Africa', etc.)
latest_countries = latest[~latest['iso_code'].str.startswith('OWID')]

# Top 10 by cases
top_cases = latest_countries[['location', 'total_cases']].sort_values(by='total_cases', ascending=False).head(10)
print("\n🌍 Top 10 countries by total cases:")
print(top_cases)

# Top 10 by deaths
top_deaths = latest_countries[['location', 'total_deaths']].sort_values(by='total_deaths', ascending=False).head(10)
print("\n🌍 Top 10 countries by total deaths:")
print(top_deaths)


🌍 Top 10 countries by total cases:
         location  total_cases
217093  Lithuania          0.0
230301   Malaysia          0.0

🌍 Top 10 countries by total deaths:
         location  total_deaths
217093  Lithuania           0.0
230301   Malaysia           0.0


In [6]:
#4️⃣ Daily new cases & deaths
# Calculate global daily sums
daily = df.groupby('date')[['new_cases', 'new_deaths']].sum().reset_index()

print(daily.tail())

            date  new_cases  new_deaths
1683  2024-08-10        0.0         0.0
1684  2024-08-11        0.0         0.0
1685  2024-08-12        0.0         0.0
1686  2024-08-13        0.0         0.0
1687  2024-08-14        0.0         0.0


In [7]:
# 5️⃣ Highest single-day spike (cases)
max_new_cases = daily['new_cases'].max()
max_new_cases_date = daily[daily['new_cases'] == max_new_cases]['date'].values[0]

print(f"📈 Highest single-day spike: {max_new_cases:,.0f} cases on {max_new_cases_date}")

📈 Highest single-day spike: 177,772,819 cases on 2022-12-25


In [8]:
#6️⃣ Average daily new cases
avg_new_cases = daily['new_cases'].mean()
print(f"📈 Average daily new cases worldwide: {avg_new_cases:,.0f}")

📈 Average daily new cases worldwide: 1,948,100
