In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.weightstats import ztest
from sklearn.preprocessing import LabelEncoder

df=pd.read_csv('covid_data.csv')
df.head()
df.isnull().sum()
df.info()
# t-stat, p-value
t_stat, p_value = stats.ttest_ind(df['Confirmed'], df['Deaths'])
print(f"T-test p-value: {p_value}")
# Z-test between Confirmed and Deaths columns
z_stat, p_value = ztest(df['Confirmed'], df['Deaths'])
print(f"Z-test p-value: {p_value}")
# Label encoding for categorical data
le = LabelEncoder()
df['Country_encoded'] = le.fit_transform(df['Country/Region'])

# Create contingency table
contingency_table = pd.crosstab(df['Country_encoded'], df['Deaths'])

# Perform chi-square test
chi2_stat, p_value, dof, expected = stats.chi2_contingency(contingency_table)
print(f"Chi-square p-value: {p_value}")

# Perform ANOVA test
anova_results = stats.f_oneway(df[df['WHO Region'] == 'Europe']['Confirmed'],
                               df[df['WHO Region'] == 'Africa']['Confirmed'])
print(f"ANOVA p-value: {anova_results.pvalue}")




Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,New cases,New deaths,New recovered,Deaths / 100 Cases,Recovered / 100 Cases,Deaths / 100 Recovered,Confirmed last week,1 week change,1 week % increase,WHO Region
0,Afghanistan,36263,1269,25198,9796,106,10,18,3.5,69.49,5.04,35526,737,2.07,Eastern Mediterranean
1,Albania,4880,144,2745,1991,117,6,63,2.95,56.25,5.25,4171,709,17.0,Europe
2,Algeria,27973,1163,18837,7973,616,8,749,4.16,67.34,6.17,23691,4282,18.07,Africa
3,Andorra,907,52,803,52,10,0,0,5.73,88.53,6.48,884,23,2.6,Europe
4,Angola,950,41,242,667,18,1,0,4.32,25.47,16.94,749,201,26.84,Africa


Unnamed: 0,0
Country/Region,0
Confirmed,0
Deaths,0
Recovered,0
Active,0
New cases,0
New deaths,0
New recovered,0
Deaths / 100 Cases,0
Recovered / 100 Cases,0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Country/Region          187 non-null    object 
 1   Confirmed               187 non-null    int64  
 2   Deaths                  187 non-null    int64  
 3   Recovered               187 non-null    int64  
 4   Active                  187 non-null    int64  
 5   New cases               187 non-null    int64  
 6   New deaths              187 non-null    int64  
 7   New recovered           187 non-null    int64  
 8   Deaths / 100 Cases      187 non-null    float64
 9   Recovered / 100 Cases   187 non-null    float64
 10  Deaths / 100 Recovered  187 non-null    float64
 11  Confirmed last week     187 non-null    int64  
 12  1 week change           187 non-null    int64  
 13  1 week % increase       187 non-null    float64
 14  WHO Region              187 non-null    ob

T-test p-value: 0.0027264618955435958


Z-test p-value: 0.0025509219727063333


Chi-square p-value: 0.2628511244152159


ANOVA p-value: 0.03333021762669799
