In [7]:
# Import libraries
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
from scipy.stats import ttest_1samp
from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import chi2_contingency 

# Load datasets
lifespans = pd.read_csv('lifespans.csv')
iron = pd.read_csv('iron.csv')

#check the data
display(lifespans.head())
display(lifespans.info())
display(lifespans.describe(include='all'))

#get vein pack data & average
vein_pack_lifespans = lifespans[lifespans['pack']=='vein']
avg_vein_pack = vein_pack_lifespans.lifespan.mean()
print(avg_vein_pack)

#One side t_test
#Null: The average lifespan of a Vein Pack subscriber is 73 years.
#Alternative: The average lifespan of a Vein Pack subscriber is NOT 73 years.
vein_pack_lifespan = vein_pack_lifespans.lifespan
tstat, pval = ttest_1samp(vein_pack_lifespan, 73)
print(pval)
print('Significant' if pval < 0.05 else 'Not Significant')

#get vein pack data & average
artery_pack_lifespans = lifespans[lifespans['pack']=='artery']
avg_artery_pack = artery_pack_lifespans.lifespan.mean()
print(avg_artery_pack)

#Null: The average lifespan of an Artery Pack subscriber is 73 years.
#Alternative: The average lifespan of an Artery Pack subscriber is NOT 73 years.
#One side t_test
artery_pack_lifespan = artery_pack_lifespans.lifespan
tstat, pval = ttest_1samp(artery_pack_lifespan, 73)
print(pval)
print('Significant' if pval < 0.05 else 'Not Significant')

#Null: The average lifespan of a Vein Pack subscriber is equal to the average lifespan of an Artery Pack subscriber.
#Alternative: The average lifespan of a Vein Pack subscriber is NOT equal to the average lifespan of an Artery Pack subscriber.
#Two sample t_test
tstat, pval = ttest_ind(vein_pack_lifespan, artery_pack_lifespan)
print(pval)
print('Significant' if pval < 0.05 else 'Not Significant')

#laod & review iron data
display(iron.head())
display(iron.info())
display(iron.describe(include='all'))

#crostab iron data
Xtab = pd.crosstab(iron.pack, iron.iron)
print(Xtab)
chi2, pval, dof, expected = chi2_contingency(Xtab)
print(pval)
print('Significant' if pval < 0.05 else 'Not Significant')

Unnamed: 0,pack,lifespan
0,vein,76.25509
1,artery,76.404504
2,artery,75.952442
3,artery,76.923082
4,artery,73.771212


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   pack      40 non-null     object 
 1   lifespan  40 non-null     float64
dtypes: float64(1), object(1)
memory usage: 772.0+ bytes


None

Unnamed: 0,pack,lifespan
count,40,40.0
unique,2,
top,vein,
freq,20,
mean,,75.521338
std,,2.152502
min,,68.314898
25%,,74.063618
50%,,75.993195
75%,,77.029501


76.16901335636044
5.972157921433211e-07
Significant
74.8736622351704
0.0012405464510317015
Significant
0.05588883079070819
Not Significant


Unnamed: 0,pack,iron
0,vein,low
1,artery,normal
2,artery,normal
3,artery,normal
4,artery,high


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 345 entries, 0 to 344
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   pack    345 non-null    object
 1   iron    345 non-null    object
dtypes: object(2)
memory usage: 5.5+ KB


None

Unnamed: 0,pack,iron
count,345,345
unique,2,3
top,vein,low
freq,200,169


iron    high  low  normal
pack                     
artery    87   29      29
vein      20  140      40
9.359749337433008e-25
Significant
