In [67]:
import pandas as pd
import numpy as np
import os

In [123]:
feature_path = '../data/interim'
meta_data_path = '../data/video_meta_data'

# features
yt = pd.read_pickle(os.path.join(feature_path, 'features_youtube.pkl'))
clin = pd.read_pickle(os.path.join(feature_path, 'features_clinical.pkl'))

# meta data youtube
# id: unique id per infant
# rated age in total weeks and total months
meta_data_yt = pd.read_pickle(os.path.join(meta_data_path, 'meta_data_yt.pkl'))
meta_data_yt['video'] = meta_data_yt.Video.str[:-4]
meta_data_yt = meta_data_yt.drop(['total_months','Video','rater', 'Months', 'Weeks', 'Unnamed: 0'],axis=1)
meta_data_yt.columns = ['age_in_weeks', 'video']
id_yt = pd.read_pickle(os.path.join(meta_data_path,'infant_id_yt.pkl'))
id_yt['video'] = id_yt.video.str[:-4]
meta_data_yt = pd.merge(meta_data_yt,id_yt, on='video', how='outer')
yt = pd.merge(yt,meta_data_yt, on='video', how='inner')

# meta data clinical
# id: unique id per infant
# age (corrected and chronological), BINS, risk, pre-term
meta_data_clin = pd.read_pickle(os.path.join(meta_data_path,'meta_data_clin.pkl'))
info_series = pd.Series([i.replace('-', '_').split('_') for i in clin.video])
info_df = info_series.apply(pd.Series)
info_df.columns = ['dum', 'infant', 'session', 'trial', 'GP', 'edited']
clin[['infant', 'session', 'trial']] = info_df[['infant', 'session', 'trial']]

# add meta-data columns:
# category: label 0 -yt, 1 -clin
# risk: 0 -yt, >0 -clin
# manual input of missing risk data based on input from Dr. Prosser. Infants: 28,29,32,33 are all high risk (corrected risk)

# merge meta-data and features dataframe
clin['category'] = 1
yt['category'] = 0
yt['risk'] = 0
yt['infant'] = 'yt_'+yt['infant_id'].astype(int).astype(str)
meta_data_clin['risk'] = meta_data_clin['Risk_low0_mod1_high2_corr'] # corrected risk for preterm infants
meta_data_clin.loc[meta_data_clin.risk.isnull(),'risk'] = meta_data_clin.loc[meta_data_clin.risk.isnull(), 'Risk_low0_mod1_high2_chron'] # chronological risk for term infants
meta_data_clin['risk'] = meta_data_clin['risk']+1
meta_data_clin['chron_age'] = meta_data_clin['Months_chron']*4 + meta_data_clin['Days_chron']/7 # chronological age for term infants
meta_data_clin['age_in_weeks'] = meta_data_clin['Months_corr']*4 + meta_data_clin['Days_corr']/7 # corrected age for preterm infants
meta_data_clin.loc[meta_data_clin.age_in_weeks.isnull(),'age_in_weeks'] = meta_data_clin.loc[meta_data_clin.age_in_weeks.isnull(), 'chron_age']
meta_data_clin = meta_data_clin[['infant','session' ,'risk', 'age_in_weeks']]
meta_data_clin.loc[np.isin(meta_data_clin.infant, np.array([28,29,32,33])),'risk'] = 3 # Dr. Prosser. Infants: 28,29,32,33 are all high risk (corrected risk)
clin['infant'] = clin.infant.astype(int)
clin['session'] = clin.session.astype(int)
clin = pd.merge(clin, meta_data_clin, on=['infant', 'session'], how='inner')
clin['infant'] = 'clin_'+clin['infant'].astype(str)+'_'+clin['age_in_weeks'].astype(int).astype(str)

clin = clin.drop(['session', 'trial'], axis=1)
yt = yt.drop('infant_id', axis=1)

features = clin.append(yt)
features = features.set_index(['video','category', 'infant', 'age_in_weeks', 'risk']).reset_index()
# average across rows for same infant
features = features.groupby('infant').mean().reset_index()

id_vars = ['infant', 'category','age_in_weeks', 'risk']
# pivot dataframe
features = pd.melt(features, id_vars=id_vars, var_name="feature", value_name="Value")

# average across left and right sides: split column into feature, part, side
# add age bracket

# save

# compute surprise
# get stats for each group


features

Unnamed: 0,infant,category,age_in_weeks,risk,feature,Value
0,clin_11_16,1,16.000000,1.0,IQR_acc_angle_LElbow,52.574287
1,clin_15_10,1,10.000000,2.0,IQR_acc_angle_LElbow,42.940209
2,clin_16_22,1,22.000000,3.0,IQR_acc_angle_LElbow,22.701369
3,clin_17_15,1,15.000000,1.0,IQR_acc_angle_LElbow,23.426590
4,clin_18_16,1,16.571429,1.0,IQR_acc_angle_LElbow,35.314280
5,clin_19_26,1,26.000000,3.0,IQR_acc_angle_LElbow,80.343646
6,clin_20_18,1,18.000000,3.0,IQR_acc_angle_LElbow,58.577384
7,clin_23_46,1,46.000000,2.0,IQR_acc_angle_LElbow,34.527306
8,clin_24_17,1,17.142857,2.0,IQR_acc_angle_LElbow,42.237291
9,clin_25_31,1,31.000000,1.0,IQR_acc_angle_LElbow,38.058047
