In [18]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pingouin as pg
import scipy.stats as stats
from scipy.stats import chi2_contingency

data_dir = 'data/datasheets'
analysis_dir = 'analysis/descriptive_stats'
os.makedirs(analysis_dir, exist_ok=True)

In [19]:
# laod preprocessed thalamic nuclei volumes 
all_data = pd.read_csv(os.path.join(data_dir, 'CombinedThalamicNucleiCorrected_combinedHemis.csv'), index_col=0)

# separate preterm and term subjects
preterm_data = all_data[all_data['blsgroup'] == 1]
fullterm_data = all_data[all_data['blsgroup'] == 2]

# Descriptive stats

Descriptive statistics for:
- <b> Age_at_scan:</b> age of participant at MRI scan
- <b> sex:</b> </b> sex of participant (1=male, 2=female)
- <b>TIV_corrected:</b> estimated total intracranial volume calculated by FreeSurfer, corrected for scanner-effect using NeuroCombat
- <b>inti:</b> intensity of neonatal treatment
- <b>GA:</b> gestational age
- <b>BW:</b> birth weight

In [20]:
# measures for descriptive statistics
measures = ['Age_at_scan', 'sex', 'TIV_corrected', 'inti', 'GA', 'BW', 'wie_gesiq']

## Preterm stats

In [21]:
descriptive_stats_preterm = preterm_data[measures].describe()
descriptive_stats_preterm.to_csv(os.path.join(analysis_dir,'DescriptiveStatsPreterm.csv'), index=True)
descriptive_stats_preterm

Unnamed: 0,Age_at_scan,sex,TIV_corrected,inti,GA,BW,wie_gesiq
count,83.0,83.0,83.0,82.0,83.0,83.0,80.0
mean,26.728926,1.481928,1561197.0,11.846707,30.373494,1321.204819,93.5625
std,0.640219,0.502711,148755.8,3.60927,2.173627,318.989057,11.872576
min,25.706849,1.0,1230073.0,3.39,25.0,730.0,71.0
25%,26.215068,1.0,1467277.0,9.5225,29.0,1060.0,85.0
50%,26.668493,1.0,1562092.0,12.345,30.0,1360.0,93.0
75%,27.271233,2.0,1662911.0,14.495,31.0,1497.5,100.0
max,28.342466,2.0,1857163.0,17.58,36.0,2070.0,131.0


In [22]:
# amount males
pt_male = preterm_data[preterm_data['sex']==1]
n_males = pt_male.shape[0]
print(n_males)

43


In [23]:
# amount females
pt_fem = preterm_data[preterm_data['sex']==2]
n_fem = pt_fem.shape[0]
print(n_fem)

40


## Fullterm stats

In [24]:
descriptive_stats_fullterm = fullterm_data[measures].describe()
descriptive_stats_fullterm.to_csv(os.path.join(analysis_dir, 'DescriptiveStatsFullterm.csv'), index=True)
descriptive_stats_fullterm

Unnamed: 0,Age_at_scan,sex,TIV_corrected,inti,GA,BW,wie_gesiq
count,92.0,92.0,92.0,1.0,91.0,91.0,89.0
mean,26.813758,1.423913,1647537.0,0.0,39.67033,3399.340659,102.876404
std,0.766407,0.496885,159309.0,,1.011654,437.210354,11.727803
min,25.487671,1.0,1289947.0,0.0,37.0,2120.0,77.0
25%,26.131507,1.0,1546094.0,0.0,39.0,3135.0,96.0
50%,26.794521,1.0,1642290.0,0.0,40.0,3410.0,102.0
75%,27.376712,2.0,1724494.0,0.0,40.0,3680.0,110.0
max,28.90137,2.0,2078900.0,0.0,41.0,4670.0,130.0


In [25]:
# amount males
ft_male = fullterm_data[fullterm_data['sex']==1]
n_males = ft_male.shape[0]
print(n_males)

53


In [26]:
# amount females
ft_fem = fullterm_data[fullterm_data['sex']==2]
n_fem = ft_fem.shape[0]
print(n_fem)

39


# Group comparisons

## Sex

In [27]:
# create contingency table
contingency_table = [[(preterm_data['sex'] == 1).sum(), (fullterm_data['sex'] == 1).sum()],
                     [(preterm_data['sex'] == 2).sum(), (fullterm_data['sex'] == 2).sum()]]
print(contingency_table)

# Perform chi-square test
print(chi2_contingency(contingency_table))

[[43, 53], [40, 39]]
Chi2ContingencyResult(statistic=0.38190295147983194, pvalue=0.5365865644261065, dof=1, expected_freq=array([[45.53142857, 50.46857143],
       [37.46857143, 41.53142857]]))


## Age

In [28]:
var='Age_at_scan'
print(stats.ttest_ind(preterm_data[var], fullterm_data[var]))

Ttest_indResult(statistic=-0.7899255830516784, pvalue=0.43065261664351395)


## Gestational age

In [29]:
var='GA'
print(stats.ttest_ind(preterm_data[var], fullterm_data[var], nan_policy='omit'))

Ttest_indResult(statistic=-36.68393303981656, pvalue=3.037607077361392e-83)


## Birth weight

In [30]:
var='BW'
print(stats.ttest_ind(preterm_data[var], fullterm_data[var], nan_policy='omit'))

Ttest_indResult(statistic=-35.52616311635247, pvalue=3.984258013352533e-81)


## eTIV

In [31]:
var='TIV_corrected'
print(stats.ttest_ind(preterm_data[var], fullterm_data[var]))

Ttest_indResult(statistic=-3.693923931411879, pvalue=0.0002959245542876834)


## full-scale IQ

In [32]:
var='wie_gesiq'
print(stats.ttest_ind(preterm_data[var], fullterm_data[var], nan_policy='omit'))

Ttest_indResult(statistic=-5.124775889127329, pvalue=8.158915551954021e-07)
