# 04GenderCOVID19-V5

# Libraries

In [45]:
%matplotlib inline

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import warnings
from IPython.display import display

from scipy import stats #the only import you actually need for this

# Loading Data

In [46]:
gender = pd.read_csv(
    "C:\py\Projects\TuringCollege\COVID19\DataSets\gender.csv",
    index_col=False,
    skipinitialspace=True,
)

# Size

In [47]:
gender.shape

(242, 5)

# Missing Data

In [48]:
gender.isna().sum()

date         0
time         0
sex          0
confirmed    0
deceased     0
dtype: int64

# Delete Unneeded Columns

In [49]:
gender = gender.drop(["time"], axis=1)

## Rename Columns

In [50]:
gender.rename(
    columns={
        "date": "Date",
        "sex": "Gender",
        "confirmed": "Confirmed",
        "deceased": "Deceased",
    },
    inplace=True,
)

## Rename Column Values

In [51]:
gender.replace('male', 'Male', inplace = True)
gender.replace('female', 'Female', inplace = True)

# Dataset

In [52]:
gender.head()

Unnamed: 0,Date,Gender,Confirmed,Deceased
0,3/2/2020,Male,1591,13
1,3/2/2020,Female,2621,9
2,3/3/2020,Male,1810,16
3,3/3/2020,Female,3002,12
4,3/4/2020,Male,1996,20


In [53]:
pd.set_option("max_rows", None)
pd.set_option("max_columns", None)

# Males Confirmed

In [79]:
males_confirmed = gender.loc[(gender['Gender'] == 'Male')]

In [80]:
males_confirmed_copy = males_confirmed.copy()

In [81]:
males_confirmed_copy["Count"] = males_confirmed_copy["Confirmed"].diff().fillna(0)

In [82]:
males_confirmed_copy["Count"].mean()

32.264462809917354

# Males Deceased

In [83]:
males_deceased = gender.loc[(gender['Gender'] == 'Male')]

In [84]:
males_deceased_copy = males_deceased.copy()

In [85]:
males_deceased_copy["Count"] = males_deceased_copy["Deceased"].diff().fillna(0)

In [86]:
males_deceased_copy["Count"].mean()

1.140495867768595

# Females Confirmed

In [87]:
females_confirmed = gender.loc[(gender['Gender'] == 'Female')]

In [88]:
females_confirmed_copy = females_confirmed.copy()

In [89]:
females_confirmed_copy["Count"] = females_confirmed_copy["Confirmed"].diff().fillna(0)

In [101]:
females_confirmed_copy["Count"].mean()

38.710743801652896

# Females Deceased

In [102]:
females_deceased = gender.loc[(gender['Gender'] == 'Female')]

In [103]:
females_deceased_copy = females_deceased.copy()

In [104]:
females_deceased_copy["Count"] = females_deceased_copy["Deceased"].diff().fillna(0)

In [105]:
females_deceased_copy["Count"].mean()

1.0082644628099173

# Confirmed Statistical Significance Test

In [91]:
male_confirmed_list = males_confirmed_copy['Confirmed'].tolist()

In [92]:
female_confirmed_list = females_confirmed_copy['Confirmed'].tolist()

In [93]:
stats.mannwhitneyu(male_confirmed_list, female_confirmed_list)

MannwhitneyuResult(statistic=909.5, pvalue=5.365238470322022e-32)

In [96]:
stats.shapiro(male_confirmed_list)

ShapiroResult(statistic=0.925539493560791, pvalue=4.721199729829095e-06)

In [97]:
stats.shapiro(female_confirmed_list)

ShapiroResult(statistic=0.8379620313644409, pvalue=3.363124501110093e-10)

In [98]:
stats.ttest_ind(male_confirmed_list,female_confirmed_list)

Ttest_indResult(statistic=-17.481890764228474, pvalue=1.0831493925030637e-44)

# Deceased Statistical Significance Test

In [109]:
male_deceased_list = males_deceased_copy['Deceased'].tolist()

In [110]:
female_deceased_list = females_deceased_copy['Deceased'].tolist()

In [111]:
stats.mannwhitneyu(male_deceased_list, female_deceased_list)

MannwhitneyuResult(statistic=9792.0, pvalue=5.632173501992387e-06)

In [112]:
stats.shapiro(male_deceased_list)

ShapiroResult(statistic=0.8193866014480591, pvalue=7.011080604968356e-11)

In [113]:
stats.shapiro(female_deceased_list)

ShapiroResult(statistic=0.7780519723892212, pvalue=3.0440921051227976e-12)

In [114]:
stats.ttest_ind(male_deceased_list,female_deceased_list)

Ttest_indResult(statistic=2.3746446029926154, pvalue=0.018353025610579034)

# Function

In [117]:
def test_sig(x, y):
    if stats.shapiro(x).pvalue >= 0.05 and stats.shapiro(y).pvalue >= 0.05: #if they are normally distriuted
        print("t-test")
        return stats.ttest_ind(x,y)
    else:
        print("mann-whitney")
        return stats.mannwhitneyu(x, y)

In [118]:
test_sig(male_deceased_list, female_deceased_list)

mann-whitney


MannwhitneyuResult(statistic=9792.0, pvalue=5.632173501992387e-06)

In [119]:
test_sig(male_confirmed_list, female_confirmed_list)

mann-whitney


MannwhitneyuResult(statistic=909.5, pvalue=5.365238470322022e-32)