In [4]:
from google.colab import files
uploaded = files.upload()

Saving covid_data.csv to covid_data.csv


In [2]:
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.stats.weightstats import ztest

In [5]:
df = pd.read_csv('covid_data.csv')

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Country/Region          187 non-null    object 
 1   Confirmed               187 non-null    int64  
 2   Deaths                  187 non-null    int64  
 3   Recovered               187 non-null    int64  
 4   Active                  187 non-null    int64  
 5   New cases               187 non-null    int64  
 6   New deaths              187 non-null    int64  
 7   New recovered           187 non-null    int64  
 8   Deaths / 100 Cases      187 non-null    float64
 9   Recovered / 100 Cases   187 non-null    float64
 10  Deaths / 100 Recovered  187 non-null    float64
 11  Confirmed last week     187 non-null    int64  
 12  1 week change           187 non-null    int64  
 13  1 week % increase       187 non-null    float64
 14  WHO Region              187 non-null    ob

In [7]:
df.head()

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Confirmed last week,1 week change,1 week % increase,WHO Region
0,Afghanistan,36263,1269,25198,9796,106,10,18,3.5,69.49,5.04,35526,737,2.07,Eastern Mediterranean
1,Albania,4880,144,2745,1991,117,6,63,2.95,56.25,5.25,4171,709,17.0,Europe
2,Algeria,27973,1163,18837,7973,616,8,749,4.16,67.34,6.17,23691,4282,18.07,Africa
3,Andorra,907,52,803,52,10,0,0,5.73,88.53,6.48,884,23,2.6,Europe
4,Angola,950,41,242,667,18,1,0,4.32,25.47,16.94,749,201,26.84,Africa


In [8]:
df.dropna(inplace=True)

In [9]:
# 1. T-TEST
# Compare Confirmed cases between Europe and Africa
# =========================
europe_confirmed = df[df['WHO Region'] == 'Europe']['Confirmed']
africa_confirmed = df[df['WHO Region'] == 'Africa']['Confirmed']

t_stat, t_p_value = stats.ttest_ind(europe_confirmed, africa_confirmed)
print(f"T-test (Confirmed - Europe vs Africa) p-value: {t_p_value}")
print(f"T-test (Confirmed - Europe vs Africa) t-statistic: {t_stat}")

T-test (Confirmed - Europe vs Africa) p-value: 0.03333021762669801
T-test (Confirmed - Europe vs Africa) t-statistic: 2.157303211162749


In [7]:
# 2. Z-TEST
# Compare Recovered cases between Europe and Africa
# =========================
europe_recovered = df[df['WHO Region'] == 'Europe']['Recovered']
africa_recovered = df[df['WHO Region'] == 'Africa']['Recovered']

z_stat, z_p_value = ztest(europe_recovered, africa_recovered)
print(f"Z-test (Recovered - Europe vs Africa) p-value: {z_p_value}")

Z-test (Recovered - Europe vs Africa) p-value: 0.06270375549840343


In [8]:
# 3. CHI-SQUARE TEST
# Compare WHO Region vs Binned Death Levels
# =========================
# Bin Deaths into categories
df['Death_Level'] = pd.cut(
    df['Deaths'],
    bins=[-1, 100, 1000, 10000, float('inf')],
    labels=['Low', 'Moderate', 'High', 'Very High']
)

# Create contingency table
contingency_table = pd.crosstab(df['WHO Region'], df['Death_Level'])

# Perform chi-square test
chi2_stat, chi2_p_value, dof, expected = stats.chi2_contingency(contingency_table)
print(f"Chi-square (WHO Region vs Death_Level) p-value: {chi2_p_value}")

Chi-square (WHO Region vs Death_Level) p-value: 0.0006273377250399645


In [9]:
# 4. ANOVA TEST
# Compare Recovered cases across multiple WHO Regions
# =========================
# Pick 3 or more WHO regions
regions = ['Europe', 'Africa', 'Eastern Mediterranean']
anova_data = [df[df['WHO Region'] == region]['Recovered'] for region in regions]

anova_stat, anova_p_value = stats.f_oneway(*anova_data)
print(f"ANOVA (Recovered across WHO Regions) p-value: {anova_p_value}")

ANOVA (Recovered across WHO Regions) p-value: 0.04067054149586916
