In [147]:
import pandas as pd
from scipy.stats import shapiro, levene, ttest_ind, f_oneway

In [148]:
df = pd.read_csv('titanic-dataset.csv')
df = df.dropna(subset=['Age'])

In [149]:
groups = {
    'Survived_Male': df[(df.Sex == 'male') & (df.Survived == 1)]['Age'],
    'Died_Male': df[(df.Sex == 'male') & (df.Survived == 0)]['Age'],
    'Survived_Female': df[(df.Sex == 'female') & (df.Survived == 1)]['Age'],
    'Died_Female': df[(df.Sex == 'female') & (df.Survived == 0)]['Age'],
}

for name, ages in groups.items():
    print(f"{name:17s}: {len(ages)}")

Survived_Male    : 93
Died_Male        : 360
Survived_Female  : 197
Died_Female      : 64


In [150]:
means = {}
n_iter = 300
n = 30

for name, ages in groups.items():
    means[name] = []
    for _ in range(n_iter):
        sample = ages.sample(n).mean()
        means[name].append(sample)

In [151]:
normal_flags = {}
for _, m in means.items():
    print(shapiro(m))

ShapiroResult(statistic=np.float64(0.996231343312217), pvalue=np.float64(0.6968232609834718))
ShapiroResult(statistic=np.float64(0.9933616859694592), pvalue=np.float64(0.20792625722161034))
ShapiroResult(statistic=np.float64(0.9958335970609807), pvalue=np.float64(0.6101715741087004))
ShapiroResult(statistic=np.float64(0.9944533561081862), pvalue=np.float64(0.34756515873917637))


In [165]:
sm, sf = groups['Survived_Male'].sample(35), groups['Survived_Female'].sample(70)
print(f"Survived:\n{levene(sm, sf)}\n")
dm, df = groups['Died_Male'].sample(100), groups['Died_Female'].sample(20)
print(f"Died:\n{levene(dm, df)}\n")

Survived:
LeveneResult(statistic=np.float64(0.32114294020629014), pvalue=np.float64(0.5721544257031312))

Died:
LeveneResult(statistic=np.float64(0.0025712589181464086), pvalue=np.float64(0.9596442702456023))



In [166]:
print(f_oneway(sm, sf))
print(f_oneway(dm, df))

F_onewayResult(statistic=np.float64(0.0028935818150954196), pvalue=np.float64(0.957205060817593))
F_onewayResult(statistic=np.float64(2.44798679370048), pvalue=np.float64(0.1203536407320144))
