In [1]:
import pandas as pd
import numpy as np

# Perceived Stress Scale

## Load raw data

In [2]:
INSTRU_QUESTIONNAIRE="PSS"
DIGEST="BASIC_DIGEST" # or IMAGEN_DIGEST, DIGEST depending on exact file name

#df_BL = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/BL/psytools/IMAGEN-IMGN_{INSTRU_QUESTIONNAIRE}_{DIGEST}.csv")
#df_FU1 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU1/psytools/IMAGEN-IMGN_{INSTRU_QUESTIONNAIRE}_{DIGEST}.csv")
#df_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/psytools/IMAGEN-IMGN_{INSTRU_QUESTIONNAIRE}_{DIGEST}.csv")
df_FU3 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_{INSTRU_QUESTIONNAIRE}_FU3.csv")

### Cleaning

In [3]:
# delete unnecessary columns
df_FU3 = df_FU3.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "ts_1_specify", "ts_2", "ts_3", "ts_4"], axis=1)

In [4]:
# insert Session column
df_FU3["Session"] = "FU3"

In [5]:
# set index to user code and rename to ID
df_FU3 = df_FU3.rename(columns={"User code": "ID"})
df_FU3 = df_FU3.set_index('ID')

In [6]:
df_FU3

Unnamed: 0_level_0,PSS_01,PSS_02,PSS_03,PSS_04,PSS_05,PSS_06,PSS_07,PSS_08,PSS_09,PSS_10,Session
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
112288,2,1,0,4,4,0,4,4,0,0,FU3
215284,2,0,2,2,2,1,2,2,2,1,FU3
240546,0,0,1,3,3,1,3,3,1,0,FU3
297685,1,1,2,3,3,1,2,2,1,1,FU3
308867,0,0,2,3,2,1,3,3,0,0,FU3
...,...,...,...,...,...,...,...,...,...,...,...
99677574,0,0,3,3,3,1,3,3,1,0,FU3
99873252,2,0,1,2,2,2,3,2,0,0,FU3
99875982,0,0,0,4,3,0,3,4,0,0,FU3
99930021,2,1,2,3,4,1,3,3,0,0,FU3


#### Compute variables of interest & splitting criterium

In [7]:
# invert items 4,5,7,8 and delete non-inverted items
pre = [0,1,2,3,4]
post = [4,3,2,1,0]

df_FU3['PSS_04_in'] = df_FU3['PSS_04'].replace(pre, post)
df_FU3['PSS_05_in'] = df_FU3['PSS_05'].replace(pre, post)
df_FU3['PSS_07_in'] = df_FU3['PSS_07'].replace(pre, post)
df_FU3['PSS_08_in'] = df_FU3['PSS_08'].replace(pre, post)

df_FU3 = df_FU3.drop(["PSS_04","PSS_05","PSS_07","PSS_08"], axis=1)

In [8]:
# compute sum score
df_FU3['PSS_total'] = df_FU3['PSS_01'] + df_FU3['PSS_02'] + df_FU3['PSS_03'] + df_FU3['PSS_04_in'] + df_FU3['PSS_05_in'] + df_FU3['PSS_06'] + df_FU3['PSS_07_in'] + df_FU3['PSS_08_in'] + df_FU3['PSS_09'] + df_FU3['PSS_10']

In [9]:
df_FU3

Unnamed: 0_level_0,PSS_01,PSS_02,PSS_03,PSS_06,PSS_09,PSS_10,Session,PSS_04_in,PSS_05_in,PSS_07_in,PSS_08_in,PSS_total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
112288,2,1,0,0,0,0,FU3,0,0,0,0,3
215284,2,0,2,1,2,1,FU3,2,2,2,2,16
240546,0,0,1,1,1,0,FU3,1,1,1,1,7
297685,1,1,2,1,1,1,FU3,1,1,2,2,13
308867,0,0,2,1,0,0,FU3,1,2,1,1,8
...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,0,0,3,1,1,0,FU3,1,1,1,1,9
99873252,2,0,1,2,0,0,FU3,2,2,1,2,12
99875982,0,0,0,0,0,0,FU3,0,1,1,0,2
99930021,2,1,2,1,0,0,FU3,1,0,1,1,9


In [10]:
# reorder columns
column_names = ["Session", "PSS_01", "PSS_02", "PSS_03", "PSS_04_in", "PSS_05_in", "PSS_06", "PSS_07_in", "PSS_08_in", "PSS_09", "PSS_10", "PSS_total"]

df_FU3 = df_FU3.reindex(columns=column_names)

In [11]:
df_FU3

Unnamed: 0_level_0,Session,PSS_01,PSS_02,PSS_03,PSS_04_in,PSS_05_in,PSS_06,PSS_07_in,PSS_08_in,PSS_09,PSS_10,PSS_total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
112288,FU3,2,1,0,0,0,0,0,0,0,0,3
215284,FU3,2,0,2,2,2,1,2,2,2,1,16
240546,FU3,0,0,1,1,1,1,1,1,1,0,7
297685,FU3,1,1,2,1,1,1,2,2,1,1,13
308867,FU3,0,0,2,1,2,1,1,1,0,0,8
...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,FU3,0,0,3,1,1,1,1,1,1,0,9
99873252,FU3,2,0,1,2,2,2,1,2,0,0,12
99875982,FU3,0,0,0,0,1,0,1,0,0,0,2
99930021,FU3,2,1,2,1,0,1,1,1,0,0,9


In [12]:
# get median for median split
df_FU3['PSS_total'].median()

12.0

#### Safe preprocessed file

In [13]:
# save 
df_FU3.to_csv('/ritter/share/data/IMAGEN/posthoc/all_PSS.csv')

## Baratt Impulsiveness Scale

### FU2

#### Load raw data

In [68]:
INSTRU_QUESTIONNAIRE="BIS_CHILD_FU2-IMAGEN_DIGEST"

BIS_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/psytools/IMAGEN-IMGN_{INSTRU_QUESTIONNAIRE}.csv")

#### Cleaning

In [70]:
# delete unnecessary columns
BIS_FU2 = BIS_FU2.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "Valid", "ts_2", "ts_3", "ts_4"], axis=1)

In [71]:
# insert Session column
BIS_FU2["Session"] = "FU2"

In [72]:
# remove -C from ID
BIS_FU2['User code'] = BIS_FU2['User code'].str.replace(r'-C$', '')

  BIS_FU2['User code'] = BIS_FU2['User code'].str.replace(r'-C$', '')


In [73]:
# set index to user code and rename to ID
BIS_FU2 = BIS_FU2.rename(columns={"User code": "ID"})
BIS_FU2 = BIS_FU2.set_index('ID')

In [74]:
BIS_FU2

Unnamed: 0_level_0,BIS_01,BIS_02,BIS_03,BIS_04,BIS_05,BIS_06,BIS_07,BIS_08,BIS_09,BIS_10,...,BIS_22,BIS_23,BIS_24,BIS_25,BIS_26,BIS_27,BIS_28,BIS_29,BIS_30,Session
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000000112288,2,1,3,1,1,1,2,4,4,4,...,1,1,1,1,1,2,1,1,3,FU2
000000215284,2,1,1,1,1,1,3,2,2,2,...,2,1,1,1,1,2,2,4,3,FU2
000000240546,3,2,2,2,1,1,2,4,3,3,...,1,2,1,1,2,2,1,1,3,FU2
000000297685,3,2,3,3,1,1,3,3,3,2,...,2,1,1,2,3,1,1,2,3,FU2
000000308867,3,2,1,2,2,2,3,3,2,3,...,2,2,2,1,1,2,3,2,3,FU2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
000099677574,2,2,4,2,2,2,1,4,2,4,...,1,3,1,1,2,2,2,1,4,FU2
000099873252,3,2,4,2,2,1,2,2,3,1,...,2,1,1,2,2,2,1,1,3,FU2
000099875982,3,3,3,4,3,3,3,3,2,3,...,2,2,3,3,2,4,4,3,3,FU2
000099930021,4,1,2,1,2,2,3,4,4,2,...,2,2,1,1,2,1,1,2,4,FU2


#### Compute variables of interest & splitting criterium

In [75]:
# invert items 1,7,8,9,10,12,13,15,20,29 and delete non-inverted items
pre = [1,2,3,4]
post = [4,3,2,1]

BIS_FU2['BIS_01_in'] = BIS_FU2['BIS_01'].replace(pre, post)
BIS_FU2['BIS_07_in'] = BIS_FU2['BIS_07'].replace(pre, post)
BIS_FU2['BIS_08_in'] = BIS_FU2['BIS_08'].replace(pre, post)
BIS_FU2['BIS_09_in'] = BIS_FU2['BIS_09'].replace(pre, post)
BIS_FU2['BIS_10_in'] = BIS_FU2['BIS_10'].replace(pre, post)
BIS_FU2['BIS_12_in'] = BIS_FU2['BIS_12'].replace(pre, post)
BIS_FU2['BIS_13_in'] = BIS_FU2['BIS_13'].replace(pre, post)
BIS_FU2['BIS_15_in'] = BIS_FU2['BIS_15'].replace(pre, post)
BIS_FU2['BIS_20_in'] = BIS_FU2['BIS_20'].replace(pre, post)
BIS_FU2['BIS_29_in'] = BIS_FU2['BIS_29'].replace(pre, post)
BIS_FU2['BIS_30_in'] = BIS_FU2['BIS_30'].replace(pre, post)

BIS_FU2 = BIS_FU2.drop(["BIS_01","BIS_07","BIS_08","BIS_09","BIS_10","BIS_12","BIS_13","BIS_15","BIS_20","BIS_29", "BIS_30"], axis=1)

In [76]:
# reorder columns
column_names = ["Session", "BIS_01_in", "BIS_02", "BIS_03", "BIS_04", "BIS_05", "BIS_06", "BIS_07_in", "BIS_08_in", "BIS_09_in", "BIS_10_in", "BIS_11", "BIS_12_in", "BIS_13_in", "BIS_14", "BIS_15_in", "BIS_16", "BIS_17", "BIS_18", "BIS_19", "BIS_20_in", "BIS_21", "BIS_22", "BIS_23", "BIS_24", "BIS_25", "BIS_26", "BIS_27", "BIS_28", "BIS_29_in", "BIS_30_in"]

BIS_FU2 = BIS_FU2.reindex(columns=column_names)

In [77]:
BIS_FU2

Unnamed: 0_level_0,Session,BIS_01_in,BIS_02,BIS_03,BIS_04,BIS_05,BIS_06,BIS_07_in,BIS_08_in,BIS_09_in,...,BIS_21,BIS_22,BIS_23,BIS_24,BIS_25,BIS_26,BIS_27,BIS_28,BIS_29_in,BIS_30_in
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000000112288,FU2,3,1,3,1,1,1,3,1,1,...,1,1,1,1,1,1,2,1,4,2
000000215284,FU2,3,1,1,1,1,1,2,3,3,...,1,2,1,1,1,1,2,2,1,2
000000240546,FU2,2,2,2,2,1,1,3,1,2,...,1,1,2,1,1,2,2,1,4,2
000000297685,FU2,2,2,3,3,1,1,2,2,2,...,1,2,1,1,2,3,1,1,3,2
000000308867,FU2,2,2,1,2,2,2,2,2,3,...,1,2,2,2,1,1,2,3,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
000099677574,FU2,3,2,4,2,2,2,4,1,3,...,2,1,3,1,1,2,2,2,4,1
000099873252,FU2,2,2,4,2,2,1,3,3,2,...,1,2,1,1,2,2,2,1,4,2
000099875982,FU2,2,3,3,4,3,3,2,2,3,...,3,2,2,3,3,2,4,4,2,2
000099930021,FU2,1,1,2,1,2,2,2,1,1,...,3,2,2,1,1,2,1,1,3,1


In [78]:
# compute subscale sum scores

# Attention
BIS_FU2['BIS Attention'] = BIS_FU2['BIS_05'] + BIS_FU2['BIS_06'] + BIS_FU2['BIS_09_in'] + BIS_FU2['BIS_11'] + BIS_FU2['BIS_20_in'] + BIS_FU2['BIS_24'] + BIS_FU2['BIS_26'] + BIS_FU2['BIS_28']

# Motor
BIS_FU2['BIS Motor'] = BIS_FU2['BIS_02'] + BIS_FU2['BIS_03'] + BIS_FU2['BIS_04'] + BIS_FU2['BIS_16'] + BIS_FU2['BIS_17'] + BIS_FU2['BIS_19'] + BIS_FU2['BIS_21'] + BIS_FU2['BIS_22'] + BIS_FU2['BIS_23'] + BIS_FU2['BIS_25'] + BIS_FU2['BIS_30_in']

# Non-Planning
BIS_FU2['BIS Non-Planning'] = BIS_FU2['BIS_01_in'] + BIS_FU2['BIS_07_in'] + BIS_FU2['BIS_08_in'] + BIS_FU2['BIS_10_in'] + BIS_FU2['BIS_12_in'] + BIS_FU2['BIS_13_in'] + BIS_FU2['BIS_14'] + BIS_FU2['BIS_15_in'] + BIS_FU2['BIS_18'] + BIS_FU2['BIS_27'] + BIS_FU2['BIS_29_in']

# compute total sum score
BIS_FU2['BIS Total'] = BIS_FU2['BIS Attention'] + BIS_FU2['BIS Motor'] + BIS_FU2['BIS Non-Planning']

In [79]:
BIS_FU2

Unnamed: 0_level_0,Session,BIS_01_in,BIS_02,BIS_03,BIS_04,BIS_05,BIS_06,BIS_07_in,BIS_08_in,BIS_09_in,...,BIS_25,BIS_26,BIS_27,BIS_28,BIS_29_in,BIS_30_in,BIS Attention,BIS Motor,BIS Non-Planning,BIS Total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000000112288,FU2,3,1,3,1,1,1,3,1,1,...,1,1,2,1,4,2,9,15,21,45
000000215284,FU2,3,1,1,1,1,1,2,3,3,...,1,1,2,2,1,2,14,13,22,49
000000240546,FU2,2,2,2,2,1,1,3,1,2,...,1,2,2,1,4,2,11,18,24,53
000000297685,FU2,2,2,3,3,1,1,2,2,2,...,2,3,1,1,3,2,13,23,24,60
000000308867,FU2,2,2,1,2,2,2,2,2,3,...,1,1,2,3,3,2,18,18,23,59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
000099677574,FU2,3,2,4,2,2,2,4,1,3,...,1,2,2,2,4,1,16,26,26,68
000099873252,FU2,2,2,4,2,2,1,3,3,2,...,2,2,2,1,4,2,12,25,29,66
000099875982,FU2,2,3,3,4,3,3,2,2,3,...,3,2,4,4,2,2,23,29,26,78
000099930021,FU2,1,1,2,1,2,2,2,1,1,...,1,2,1,1,3,1,11,17,17,45


In [80]:
# get median for median split
BIS_FU2['BIS Total'].median()

62.0

### FU3

#### Load raw data

In [81]:
INSTRU_QUESTIONNAIRE="BIS_FU3"

BIS_FU3 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_{INSTRU_QUESTIONNAIRE}.csv")

#### Cleaning

In [82]:
# delete unnecessary columns
BIS_FU3 = BIS_FU3.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "ts_1_specify", "ts_2", "ts_3", "ts_4"], axis=1)

In [83]:
# insert Session column
BIS_FU3["Session"] = "FU3"

In [84]:
# set index to user code and rename to ID
BIS_FU3 = BIS_FU3.rename(columns={"User code": "ID"})
BIS_FU3 = BIS_FU3.set_index('ID')

In [85]:
BIS_FU3

Unnamed: 0_level_0,BIS_01,BIS_02,BIS_03,BIS_04,BIS_05,BIS_06,BIS_07,BIS_08,BIS_09,BIS_10,...,BIS_22,BIS_23,BIS_24,BIS_25,BIS_26,BIS_27,BIS_28,BIS_29,BIS_30,Session
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
112288,4,3,4,1,1,2,4,4,4,2,...,1,1,1,1,1,4,1,2,4,FU3
215284,2,1,1,1,1,1,4,3,3,2,...,2,1,1,1,1,2,1,4,2,FU3
240546,2,1,1,2,2,1,2,2,3,2,...,1,2,1,1,2,2,1,3,3,FU3
297685,3,1,3,3,1,4,4,3,3,4,...,2,1,1,1,4,2,2,2,3,FU3
308867,2,2,1,3,2,2,3,3,1,3,...,1,1,2,1,1,2,3,2,3,FU3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,3,3,4,2,2,2,2,2,2,4,...,1,3,1,1,2,2,2,1,3,FU3
99873252,3,2,3,2,2,1,1,2,2,1,...,3,2,1,2,3,3,1,1,3,FU3
99875982,3,2,1,2,3,2,3,2,3,4,...,2,1,1,1,2,2,1,2,2,FU3
99930021,4,1,2,1,1,2,3,3,3,2,...,2,1,2,2,2,2,1,2,4,FU3


#### Compute variables of interest & splitting criterium

In [86]:
# invert items 1,7,8,9,10,12,13,15,20,29 and delete non-inverted items
pre = [1,2,3,4]
post = [4,3,2,1]

BIS_FU3['BIS_01_in'] = BIS_FU3['BIS_01'].replace(pre, post)
BIS_FU3['BIS_07_in'] = BIS_FU3['BIS_07'].replace(pre, post)
BIS_FU3['BIS_08_in'] = BIS_FU3['BIS_08'].replace(pre, post)
BIS_FU3['BIS_09_in'] = BIS_FU3['BIS_09'].replace(pre, post)
BIS_FU3['BIS_10_in'] = BIS_FU3['BIS_10'].replace(pre, post)
BIS_FU3['BIS_12_in'] = BIS_FU3['BIS_12'].replace(pre, post)
BIS_FU3['BIS_13_in'] = BIS_FU3['BIS_13'].replace(pre, post)
BIS_FU3['BIS_15_in'] = BIS_FU3['BIS_15'].replace(pre, post)
BIS_FU3['BIS_20_in'] = BIS_FU3['BIS_20'].replace(pre, post)
BIS_FU3['BIS_29_in'] = BIS_FU3['BIS_29'].replace(pre, post)
BIS_FU3['BIS_30_in'] = BIS_FU3['BIS_30'].replace(pre, post)

BIS_FU3 = BIS_FU3.drop(["BIS_01","BIS_07","BIS_08","BIS_09","BIS_10","BIS_12","BIS_13","BIS_15","BIS_20","BIS_29", "BIS_30"], axis=1)

In [87]:
# reorder columns
column_names = ["Session", "BIS_01_in", "BIS_02", "BIS_03", "BIS_04", "BIS_05", "BIS_06", "BIS_07_in", "BIS_08_in", "BIS_09_in", "BIS_10_in", "BIS_11", "BIS_12_in", "BIS_13_in", "BIS_14", "BIS_15_in", "BIS_16", "BIS_17", "BIS_18", "BIS_19", "BIS_20_in", "BIS_21", "BIS_22", "BIS_23", "BIS_24", "BIS_25", "BIS_26", "BIS_27", "BIS_28", "BIS_29_in", "BIS_30_in"]

BIS_FU3 = BIS_FU3.reindex(columns=column_names)

In [88]:
BIS_FU3

Unnamed: 0_level_0,Session,BIS_01_in,BIS_02,BIS_03,BIS_04,BIS_05,BIS_06,BIS_07_in,BIS_08_in,BIS_09_in,...,BIS_21,BIS_22,BIS_23,BIS_24,BIS_25,BIS_26,BIS_27,BIS_28,BIS_29_in,BIS_30_in
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
112288,FU3,1,3,4,1,1,2,1,1,1,...,2,1,1,1,1,1,4,1,3,1
215284,FU3,3,1,1,1,1,1,1,2,2,...,1,2,1,1,1,1,2,1,1,3
240546,FU3,3,1,1,2,2,1,3,3,2,...,1,1,2,1,1,2,2,1,2,2
297685,FU3,2,1,3,3,1,4,1,2,2,...,1,2,1,1,1,4,2,2,3,2
308867,FU3,3,2,1,3,2,2,2,2,4,...,1,1,1,2,1,1,2,3,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,FU3,2,3,4,2,2,2,3,3,3,...,2,1,3,1,1,2,2,2,4,2
99873252,FU3,2,2,3,2,2,1,4,3,3,...,1,3,2,1,2,3,3,1,4,2
99875982,FU3,2,2,1,2,3,2,2,3,2,...,1,2,1,1,1,2,2,1,3,3
99930021,FU3,1,1,2,1,1,2,2,2,2,...,2,2,1,2,2,2,2,1,3,1


In [89]:
# compute subscale sum scores

# Attention
BIS_FU3['BIS Attention'] = BIS_FU3['BIS_05'] + BIS_FU3['BIS_06'] + BIS_FU3['BIS_09_in'] + BIS_FU3['BIS_11'] + BIS_FU3['BIS_20_in'] + BIS_FU3['BIS_24'] + BIS_FU3['BIS_26'] + BIS_FU3['BIS_28']

# Motor
BIS_FU3['BIS Motor'] = BIS_FU3['BIS_02'] + BIS_FU3['BIS_03'] + BIS_FU3['BIS_04'] + BIS_FU3['BIS_16'] + BIS_FU3['BIS_17'] + BIS_FU3['BIS_19'] + BIS_FU3['BIS_21'] + BIS_FU3['BIS_22'] + BIS_FU3['BIS_23'] + BIS_FU3['BIS_25'] + BIS_FU3['BIS_30_in']

# Non-Planning
BIS_FU3['BIS Non-Planning'] = BIS_FU3['BIS_01_in'] + BIS_FU3['BIS_07_in'] + BIS_FU3['BIS_08_in'] + BIS_FU3['BIS_10_in'] + BIS_FU3['BIS_12_in'] + BIS_FU3['BIS_13_in'] + BIS_FU3['BIS_14'] + BIS_FU3['BIS_15_in'] + BIS_FU3['BIS_18'] + BIS_FU3['BIS_27'] + BIS_FU3['BIS_29_in']

# compute total sum score
BIS_FU3['BIS Total'] = BIS_FU3['BIS Attention'] + BIS_FU3['BIS Motor'] + BIS_FU3['BIS Non-Planning']

In [90]:
BIS_FU3

Unnamed: 0_level_0,Session,BIS_01_in,BIS_02,BIS_03,BIS_04,BIS_05,BIS_06,BIS_07_in,BIS_08_in,BIS_09_in,...,BIS_25,BIS_26,BIS_27,BIS_28,BIS_29_in,BIS_30_in,BIS Attention,BIS Motor,BIS Non-Planning,BIS Total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
112288,FU3,1,3,4,1,1,2,1,1,1,...,1,1,4,1,3,1,11,19,22,52
215284,FU3,3,1,1,1,1,1,1,2,2,...,1,1,2,1,1,3,11,14,21,46
240546,FU3,3,1,1,2,2,1,3,3,2,...,1,2,2,1,2,2,12,16,24,52
297685,FU3,2,1,3,3,1,4,1,2,2,...,1,4,2,2,3,2,19,20,21,60
308867,FU3,3,2,1,3,2,2,2,2,4,...,1,1,2,3,3,2,20,17,27,64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,FU3,2,3,4,2,2,2,3,3,3,...,1,2,2,2,4,2,19,28,29,76
99873252,FU3,2,2,3,2,2,1,4,3,3,...,2,3,3,1,4,2,15,24,34,73
99875982,FU3,2,2,1,2,3,2,2,3,2,...,1,2,2,1,3,3,14,18,22,54
99930021,FU3,1,1,2,1,1,2,2,2,2,...,2,2,2,1,3,1,12,16,20,48


In [91]:
# get median for median split
BIS_FU3['BIS Total'].median()

61.5

### Combine FU2 and FU3 into 1 data frame

In [93]:
BIS_all = pd.concat([BIS_FU2, BIS_FU3])

In [94]:
BIS_all

Unnamed: 0_level_0,Session,BIS_01_in,BIS_02,BIS_03,BIS_04,BIS_05,BIS_06,BIS_07_in,BIS_08_in,BIS_09_in,...,BIS_25,BIS_26,BIS_27,BIS_28,BIS_29_in,BIS_30_in,BIS Attention,BIS Motor,BIS Non-Planning,BIS Total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000000112288,FU2,3,1,3,1,1,1,3,1,1,...,1,1,2,1,4,2,9,15,21,45
000000215284,FU2,3,1,1,1,1,1,2,3,3,...,1,1,2,2,1,2,14,13,22,49
000000240546,FU2,2,2,2,2,1,1,3,1,2,...,1,2,2,1,4,2,11,18,24,53
000000297685,FU2,2,2,3,3,1,1,2,2,2,...,2,3,1,1,3,2,13,23,24,60
000000308867,FU2,2,2,1,2,2,2,2,2,3,...,1,1,2,3,3,2,18,18,23,59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,FU3,2,3,4,2,2,2,3,3,3,...,1,2,2,2,4,2,19,28,29,76
99873252,FU3,2,2,3,2,2,1,4,3,3,...,2,3,3,1,4,2,15,24,34,73
99875982,FU3,2,2,1,2,3,2,2,3,2,...,1,2,2,1,3,3,14,18,22,54
99930021,FU3,1,1,2,1,1,2,2,2,2,...,2,2,2,1,3,1,12,16,20,48


#### Safe preprocessed file

In [95]:
# save 
BIS_all.to_csv('/ritter/share/data/IMAGEN/posthoc/all_BIS.csv')

## CANTAB Cambridge Guessing Task

### BL

In [14]:
CGT_BL = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/BL/cantab/IMAGEN_cantab_BL.csv")
CGT_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/cantab/IMAGEN_cantab_FU2.csv")
CGT_FU3 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/cantab/IMAGEN_cantab_FU3.csv")

#### Cleaning

In [15]:
# select CGT columns
CGT_BL = CGT_BL[["PSC2", "CGT Delay aversion", "CGT Deliberation time", "CGT Overall proportion bet", "CGT Quality of decision making", "CGT Risk adjustment", "CGT Risk taking"]]
CGT_FU2 = CGT_FU2[["PSC2", "CGT Delay aversion", "CGT Deliberation time", "CGT Overall proportion bet", "CGT Quality of decision making", "CGT Risk adjustment", "CGT Risk taking"]]
CGT_FU3 = CGT_FU3[["PSC2", "CGT Delay aversion", "CGT Deliberation time", "CGT Overall proportion bet", "CGT Quality of decision making", "CGT Risk adjustment", "CGT Risk taking"]]

In [16]:
# insert Session column
CGT_BL["Session"] = "BL"
CGT_FU2["Session"] = "FU2"
CGT_FU3["Session"] = "FU3"

In [17]:
# set index to user code and rename to ID
CGT_BL = CGT_BL.rename(columns={"PSC2": "ID"})
CGT_BL = CGT_BL.set_index('ID')
CGT_FU2 = CGT_FU2.rename(columns={"PSC2": "ID"})
CGT_FU2 = CGT_FU2.set_index('ID')
CGT_FU3 = CGT_FU3.rename(columns={"PSC2": "ID"})
CGT_FU3 = CGT_FU3.set_index('ID')

#### Compute variables of interest & splitting criterium

In [18]:
# reorder columns
column_names = ["Session", "CGT Delay aversion", "CGT Deliberation time", "CGT Overall proportion bet", "CGT Quality of decision making", "CGT Risk adjustment", "CGT Risk taking"]

CGT_BL = CGT_BL.reindex(columns=column_names)
CGT_FU2 = CGT_FU2.reindex(columns=column_names)
CGT_FU3 = CGT_FU3.reindex(columns=column_names)

In [19]:
CGT_BL

Unnamed: 0_level_0,Session,CGT Delay aversion,CGT Deliberation time,CGT Overall proportion bet,CGT Quality of decision making,CGT Risk adjustment,CGT Risk taking
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1274,BL,0.0823156682,3090.375,0.3611111111,0.921875,1.412905452,0.4093220339
22453,BL,0.3160526316,2043.0327868852,0.5967213115,0.7090909091,1.3616529385,0.658974359
75717,BL,0.2450100806,1315.125,0.3972222222,0.984375,3.0482632541,0.4341269841
106601,BL,0.0484375,2270.6944444444,0.5402777778,1.0,1.9601593625,0.58828125
106871,BL,0.353125,2382.2777777778,0.3159722222,1.0,1.3896713615,0.3328125
...,...,...,...,...,...,...,...
99873252,BL,0.4650744417,1894.0833333333,0.4715277778,0.890625,0.6982113821,0.5394736842
99875982,BL,0.2544858871,2159.1944444444,0.5076388889,0.984375,0.2263333333,0.5357142857
99888850,BL,-0.0235526316,3207.0,0.2819444444,0.609375,0.5903450808,0.291025641
99930021,BL,0.1977678571,2168.4861111111,0.3486111111,0.9375,1.3499681122,0.3733333333


In [20]:
CGT_FU2

Unnamed: 0_level_0,Session,CGT Delay aversion,CGT Deliberation time,CGT Overall proportion bet,CGT Quality of decision making,CGT Risk adjustment,CGT Risk taking
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
112288,FU2,0.606000,1257.639344,0.480328,0.818182,1.591112,0.533333
215284,FU2,0.172000,3137.013889,0.371528,0.781250,0.580543,0.430000
240546,FU2,0.101109,1797.208333,0.526389,0.984375,1.842286,0.565873
297685,FU2,0.155746,1415.888889,0.509028,0.984375,1.400382,0.545238
308867,FU2,0.175000,1291.027778,0.486111,1.000000,3.103245,0.529687
...,...,...,...,...,...,...,...
99747799,FU2,0.060104,2151.569444,0.688889,0.968750,0.721132,0.719355
99873252,FU2,0.474597,3536.802817,0.483099,0.936508,-0.165621,0.511864
99875982,FU2,0.292742,1512.055556,0.611111,0.984375,0.580142,0.630952
99930021,FU2,0.364063,1923.958333,0.560417,1.000000,1.156627,0.583594


In [21]:
CGT_FU3

Unnamed: 0_level_0,Session,CGT Delay aversion,CGT Deliberation time,CGT Overall proportion bet,CGT Quality of decision making,CGT Risk adjustment,CGT Risk taking
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
112288,FU3,0.230871,1481.750000,0.537500,0.807018,2.749771,0.650000
215284,FU3,0.508000,2032.676923,0.541538,0.862069,-0.464460,0.598000
240546,FU3,0.023438,1281.000000,0.460417,1.000000,2.121306,0.502344
297685,FU3,0.239063,967.541667,0.611111,1.000000,1.288623,0.652344
308867,FU3,0.161681,1700.573529,0.602206,0.883333,1.639430,0.683019
...,...,...,...,...,...,...,...
99677574,FU3,0.275806,2170.720588,0.563971,0.967213,1.026160,0.605085
99873252,FU3,0.481130,1405.367647,0.658824,0.966667,0.398887,0.667241
99875982,FU3,0.194195,1674.971831,0.666901,0.936508,-0.046151,0.677119
99930021,FU3,0.286458,1435.972222,0.396528,0.968750,1.787951,0.439516


In [22]:
# bug: ;" instead of . in some cells --> replace in all data frames
CGT_BL = CGT_BL.replace(';"','.', regex=True)
CGT_FU2 = CGT_FU2.replace(';"','.', regex=True)
CGT_FU3 = CGT_FU3.replace(';"','.', regex=True)

In [28]:
# get medians for median splits
# BL
a=CGT_BL['CGT Delay aversion'].median()
b=CGT_BL['CGT Deliberation time'].median()
c=CGT_BL['CGT Overall proportion bet'].median()
d=CGT_BL['CGT Quality of decision making'].median()
e=CGT_BL['CGT Risk adjustment'].median()
f=CGT_BL['CGT Risk taking'].median()
# FU2
g=CGT_FU2['CGT Delay aversion'].median()
h=CGT_FU2['CGT Deliberation time'].median()
i=CGT_FU2['CGT Overall proportion bet'].median()
j=CGT_FU2['CGT Quality of decision making'].median()
k=CGT_FU2['CGT Risk adjustment'].median()
l=CGT_FU2['CGT Risk taking'].median()
# FU3
m=CGT_FU3['CGT Delay aversion'].median()
n=CGT_FU3['CGT Deliberation time'].median()
o=CGT_FU3['CGT Overall proportion bet'].median()
p=CGT_FU3['CGT Quality of decision making'].median()
q=CGT_FU3['CGT Risk adjustment'].median()
r=CGT_FU3['CGT Risk taking'].median()

In [29]:
m

0.1524865591

In [30]:
o

0.5208333333

In [31]:
q

2.0038934426

In [None]:
r

### Combine FU2 and FU3 into 1 data frame

In [26]:
CGT_all = pd.concat([CGT_BL, CGT_FU2, CGT_FU3])

In [27]:
CGT_all

Unnamed: 0_level_0,Session,CGT Delay aversion,CGT Deliberation time,CGT Overall proportion bet,CGT Quality of decision making,CGT Risk adjustment,CGT Risk taking
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1274,BL,0.0823156682,3090.375,0.3611111111,0.921875,1.412905452,0.4093220339
22453,BL,0.3160526316,2043.0327868852,0.5967213115,0.7090909091,1.3616529385,0.658974359
75717,BL,0.2450100806,1315.125,0.3972222222,0.984375,3.0482632541,0.4341269841
106601,BL,0.0484375,2270.6944444444,0.5402777778,1.0,1.9601593625,0.58828125
106871,BL,0.353125,2382.2777777778,0.3159722222,1.0,1.3896713615,0.3328125
...,...,...,...,...,...,...,...
99677574,FU3,0.275806,2170.720588,0.563971,0.967213,1.02616,0.605085
99873252,FU3,0.48113,1405.367647,0.658824,0.966667,0.398887,0.667241
99875982,FU3,0.194195,1674.971831,0.666901,0.936508,-0.046151,0.677119
99930021,FU3,0.286458,1435.972222,0.396528,0.96875,1.787951,0.439516


#### Safe preprocessed file

In [28]:
# save 
CGT_all.to_csv('/ritter/share/data/IMAGEN/posthoc/all_CGT.csv')

In [None]:
r

0.5766129032

## ESPAD Alcohol (EAQF)

Quantity Frequency
- 8a 'On how many occasions IN YOUR WHOLE LIFETIME have you had any alcoholic beverage to drink?'.
- 8b 'On how many occasions OVER THE LAST 12 MONTHS have you had any alcoholic beverage to drink?'.
- 8c 'On how many occasions OVER THE LAST 30 DAYS have you had any alcoholic beverage to drink?'.
- 9a 'On how many occasions OVER THE LAST 30 DAYS have youhad beer to drink? (do not include low alcohol beer)'.
- 29beer 'When did you FIRST drink beer (at least one glass)?'.
- 9b 'On how many occasions OVER THE LAST 30 DAYS have you had wine to drink?'.
- 29wine 'When did you FIRST drink wine (at least one glass)?'.
- 9c 'On how many occasions OVER THE LAST 30 DAYS have youhad an alcopop to drink? (e.g. Bacardi Breezer)'.
- 29alcopop 'When did you FIRST drink alcopops (at least one bottle)?'.
- 9d 'On how many occasions OVER THE LAST 30 DAYS have you had spirits to drink? (whisky, cognac, shot drinks etc., also include spirits mixed with soft drinks, except alcopops)'.
- 29spirits 'When did you FIRST drink spirits (at least one shot)?'.

Binging
- 17a 'How many times IN YOUR WHOLE LIFETIME have you had five or more drinks in a row?'.
- 17b 'How many times OVER THE LAST 12 MONTHS have you had five or more drinks in a row?'.
- 17c 'How many times OVER THE LAST 30 DAYS have you had five or more drinks in a row?'.
- prev31 'How many drinks containing alcohol do you have on a TYPICAL DAY when you are drinking?'.
- 19a 'On how many occasions IN YOUR WHOLE LIFETIME have you been drunk from drinking alcoholic beverages?'.
- 19b 'On how many occasions OVER THE LAST 12 MONTHS have you been drunk from drinking alcoholic beverages?'.
- 19c 'On how many occasions OVER THE LAST 30 DAYS have you been drunk from drinking alcoholic beverages?'.
- 20 'Please indicate on this scale from 1 to 10 how drunk you would say you were the last time you were drunk.'.
- 21 'How many drinks do you usually need to get drunk?'. 

Alcohol expectancies
How likely is it that each of the following things would happen to you personally, if you drink alcohol?
- 18a 'Feel relaxed'.
- 18b 'Get into trouble with police'.
- 18c 'Harm my health'.
- 18d 'Feel happy'.
- 18e 'Forget my problems'.
- 18f 'Not be able to stop drinking'.
- 18g 'Get a hangover'.
- 18h 'Feel more friendly and outgoing'.
- 18i 'Do something I would regret'.
- 18j 'Have a lot of fun'.
- 18k 'Feel sick'. 

Onset
- 29d 'When did you FIRST get drunk from drinking alcoholic beverages?'.




In [3]:
# Load data
EA_BL = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/BL/psytools/IMAGEN-IMGN_ESPAD_CHILD_RC5-IMAGEN_DIGEST.csv", dtype={"User code":str})
EA_BL["Session"] = "BL"
EA_FU1 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU1/psytools/IMAGEN-IMGN_ESPAD_CHILD_FU_RC5-IMAGEN_DIGEST.csv", dtype={"User code":str})
EA_FU1["Session"] = "FU1"
EA_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/psytools/IMAGEN-IMGN_ESPAD_CHILD_FU2-IMAGEN_DIGEST.csv", dtype={"User code":str})
EA_FU2["Session"] = "FU2"
EA_FU3 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_ESPAD_FU3.csv", dtype={"User code":str})
EA_FU3["Session"] = "FU3"

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


### Cleaning

In [4]:
# combine into 1 data frame
EA = pd.concat([EA_BL, EA_FU1, EA_FU2, EA_FU3])

# clean ID column
EA["User code"] = EA["User code"].apply(lambda x: int(x.replace("-C", "")))
EA = EA.rename(columns={"User code" : "ID"})

In [5]:
#filter only the questions regarding alcohol since there are >1k columns
EA = EA[["ID", "Session", 
               "8a", "8b", "8c", "9a", "9b", "9c", "9d", "17a", "17b", "17c",
               "29beer", "29wine", "29alcopop", "29spirits",
               "prev31", "18a", "18b", "18c", "18d", "18e", "18f", "18g", "18h",
               "18i", "18j", "18k", "19a", "19b", "19c", "29d", "20", "21"]]
EA

Unnamed: 0,ID,Session,8a,8b,8c,9a,9b,9c,9d,17a,...,18h,18i,18j,18k,19a,19b,19c,29d,20,21
0,1274,BL,0,,,,,,,,...,,,,,,,,,,
1,22453,BL,1,1.0,0.0,0.0,1.0,1.0,0.0,2.0,...,2.0,5.0,2.0,5.0,1.0,1.0,0.0,13.0,7.0,5.0
2,75717,BL,5,2.0,1.0,1.0,0.0,0.0,1.0,5.0,...,2.0,4.0,2.0,4.0,3.0,2.0,1.0,13.0,4.0,3.0
3,106601,BL,0,,,,,,,,...,,,,,,,,,,
4,106871,BL,1,0.0,,0.0,0.0,0.0,0.0,0.0,...,5.0,5.0,5.0,4.0,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1346,99677574,FU3,3,1.0,0.0,,,,,0.0,...,4.0,3.0,3.0,3.0,0.0,,,,,
1347,99873252,FU3,6,5.0,2.0,1.0,2.0,1.0,1.0,5.0,...,1.0,3.0,2.0,3.0,6.0,5.0,2.0,15.0,7.0,2.0
1348,99875982,FU3,4,2.0,1.0,1.0,0.0,0.0,0.0,3.0,...,2.0,4.0,1.0,2.0,1.0,0.0,,18.0,8.0,3.0
1349,99930021,FU3,1,1.0,1.0,1.0,1.0,0.0,4.0,3.0,...,3.0,5.0,3.0,5.0,2.0,0.0,,16.0,6.0,3.0


In [6]:
EA = EA.set_index('ID')

## Compute variables of interest

In [7]:
# invert positively formulated expectancy items so that 1 = very unlikely, 5 = very likely
pre = [1,2,3,4,5] 
post = [5,4,3,2,1]

EA["18a"] = EA["18a"].replace(pre, post)
EA["18d"] = EA["18d"].replace(pre, post)
EA["18e"] = EA["18e"].replace(pre, post)
EA["18h"] = EA["18h"].replace(pre, post)
EA["18j"] = EA["18j"].replace(pre, post)

In [8]:
# compute sum score
EA["Expectancy Total"] = EA["18a"] + EA["18b"] + EA["18c"] + EA["18d"] + EA["18e"] + EA["18f"] + EA["18g"] + EA["18h"] + EA["18i"] + EA["18j"] + EA["18k"]

### Save preprocessed file

In [18]:
# save 
EA.to_csv('/ritter/share/data/IMAGEN/posthoc/all_ESPAD_Alc.csv')

### Compute final first drunk score

In [9]:
# compute variable including the earliest age mentioned in 29d (first drunkenness)
first_drunk = EA[["Session", "29d"]]

first_drunk_fu3 = first_drunk.loc[first_drunk['Session'] == 'FU3']
first_drunk_fu3 = first_drunk_fu3[["29d"]]
first_drunk_fu3 = first_drunk_fu3.rename(columns={"29d": "FU3"})

first_drunk_fu2 = first_drunk.loc[first_drunk['Session'] == 'FU2']
first_drunk_fu2 = first_drunk_fu2[["29d"]]
first_drunk_fu2 = first_drunk_fu2.rename(columns={"29d": "FU2"})

first_drunk_fu1 = first_drunk.loc[first_drunk['Session'] == 'FU1']
first_drunk_fu1 = first_drunk_fu1[["29d"]]
first_drunk_fu1 = first_drunk_fu1.rename(columns={"29d": "FU1"})

first_drunk_bl = first_drunk.loc[first_drunk['Session'] == 'BL']
first_drunk_bl = first_drunk_bl[["29d"]]
first_drunk_bl = first_drunk_bl.rename(columns={"29d": "BL"})

In [10]:
# concatenate FTND & EA
first_drunk = pd.merge(left=first_drunk_fu3, right=first_drunk_fu2, how='left', left_on=['ID'], right_on=['ID'])
first_drunk = pd.merge(left=first_drunk, right=first_drunk_fu1, how='left', left_on=['ID'], right_on=['ID'])
first_drunk = pd.merge(left=first_drunk, right=first_drunk_bl, how='left', left_on=['ID'], right_on=['ID'])
first_drunk

Unnamed: 0_level_0,FU3,FU2,FU1,BL
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
112288,14.0,15.0,14.0,
215284,18.0,15.0,14.0,
240546,15.0,17.0,17.0,
297685,15.0,14.0,14.0,14.0
308867,18.0,18.0,,
...,...,...,...,...
99677574,,,,
99873252,15.0,15.0,15.0,
99875982,18.0,17.0,,
99930021,16.0,15.0,16.0,


In [11]:
#first_drunk['fd'] = first_drunk["FU3"]
#first_drunk['fd'] = np.where(first_drunk['FU2'] < first_drunk['fd'], first_drunk['FU2'], first_drunk['fd'])
#first_drunk['fd'] = np.where(first_drunk['FU1'] < first_drunk['fd'], first_drunk['FU1'], first_drunk['fd'])
#first_drunk['fd'] = np.where(first_drunk['BL'] < first_drunk['fd'], first_drunk['BL'], first_drunk['fd'])
#first_drunk

In [12]:
first_drunk['fd'] = first_drunk["BL"]
first_drunk["fd"] = first_drunk["fd"].fillna(first_drunk["FU1"])
first_drunk["fd"] = first_drunk["fd"].fillna(first_drunk["FU2"])
first_drunk["fd"] = first_drunk["fd"].fillna(first_drunk["FU3"])
first_drunk

Unnamed: 0_level_0,FU3,FU2,FU1,BL,fd
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
112288,14.0,15.0,14.0,,14.0
215284,18.0,15.0,14.0,,14.0
240546,15.0,17.0,17.0,,17.0
297685,15.0,14.0,14.0,14.0,14.0
308867,18.0,18.0,,,18.0
...,...,...,...,...,...
99677574,,,,,
99873252,15.0,15.0,15.0,,15.0
99875982,18.0,17.0,,,17.0
99930021,16.0,15.0,16.0,,16.0


In [13]:
# remove rows with missing value in fd
first_drunk = first_drunk[first_drunk['fd'].notna()]
first_drunk

Unnamed: 0_level_0,FU3,FU2,FU1,BL,fd
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
112288,14.0,15.0,14.0,,14.0
215284,18.0,15.0,14.0,,14.0
240546,15.0,17.0,17.0,,17.0
297685,15.0,14.0,14.0,14.0,14.0
308867,18.0,18.0,,,18.0
...,...,...,...,...,...
99616225,13.0,13.0,13.0,11.0,11.0
99873252,15.0,15.0,15.0,,15.0
99875982,18.0,17.0,,,17.0
99930021,16.0,15.0,16.0,,16.0


In [14]:
# add session row
first_drunk["Session"] = "FU3"
first_drunk

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_drunk["Session"] = "FU3"


Unnamed: 0_level_0,FU3,FU2,FU1,BL,fd,Session
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
112288,14.0,15.0,14.0,,14.0,FU3
215284,18.0,15.0,14.0,,14.0,FU3
240546,15.0,17.0,17.0,,17.0,FU3
297685,15.0,14.0,14.0,14.0,14.0,FU3
308867,18.0,18.0,,,18.0,FU3
...,...,...,...,...,...,...
99616225,13.0,13.0,13.0,11.0,11.0,FU3
99873252,15.0,15.0,15.0,,15.0,FU3
99875982,18.0,17.0,,,17.0,FU3
99930021,16.0,15.0,16.0,,16.0,FU3


In [15]:
# save 
first_drunk.to_csv('/ritter/share/data/IMAGEN/posthoc/all_ESPAD_first_drunk.csv')

## ESPAD Problematic Substance Use

'Please indicate if you have used xy on a weekly basis OVER A 12 MONTH PERIOD or in an
excessive or problematic way IN YOUR LIFETIME?'

In [3]:
# Load data
EA_BL = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/BL/psytools/IMAGEN-IMGN_ESPAD_CHILD_RC5-IMAGEN_DIGEST.csv", dtype={"User code":str})
EA_BL["Session"] = "BL"
EA_FU1 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU1/psytools/IMAGEN-IMGN_ESPAD_CHILD_FU_RC5-IMAGEN_DIGEST.csv", dtype={"User code":str})
EA_FU1["Session"] = "FU1"
EA_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/psytools/IMAGEN-IMGN_ESPAD_CHILD_FU2-IMAGEN_DIGEST.csv", dtype={"User code":str})
EA_FU2["Session"] = "FU2"
EA_FU3 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_ESPAD_FU3.csv", dtype={"User code":str})
EA_FU3["Session"] = "FU3"
FTND= pd.read_csv(f"/ritter/share/data/IMAGEN/posthoc/all_FTND.csv", dtype={"User code":str})

  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)
  exec(code_obj, self.user_global_ns, self.user_ns)


### Cleaning

In [4]:
# combine into 1 data frame
EA = pd.concat([EA_BL, EA_FU1, EA_FU2, EA_FU3])

# clean ID column
EA["User code"] = EA["User code"].apply(lambda x: int(x.replace("-C", "")))
EA = EA.rename(columns={"User code" : "ID"})
EA

Unnamed: 0,ID,Iteration,Language,Completed,Completed Timestamp,Processed Timestamp,Valid,17a,17b,17c,...,dast9crack,dast9relevin,first_narcotic 1,life_anabolic 1,life_relevin 1,life_relevin 2,month_narcotic 1,ts_1_specify,year_crack 1,year_heroin 1
0,1274,1,en,t,5454,5454,t,,,,...,,,,,,,,,,
1,22453,1,de,t,5365,5365,t,2.0,1.0,0.0,...,,,,,,,,,,
2,75717,1,en,t,5350,5350,t,5.0,3.0,2.0,...,,,,,,,,,,
3,106601,1,fr,t,5060,5060,t,,,,...,,,,,,,,,,
4,106871,1,de,t,5574,5574,t,0.0,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1346,99677574,1,de,t,8197,8197,,0.0,,,...,,,,,,,,,,
1347,99873252,1,de,t,8370,8370,,5.0,5.0,2.0,...,,,,,,,,,,
1348,99875982,1,en,t,8123,8123,,3.0,0.0,,...,,,,,,,,,,
1349,99930021,1,de,t,8129,8129,,3.0,0.0,,...,,,,,,,,,,


In [5]:
#filter only the questions regarding alcohol since there are >1k columns
EA = EA[["ID", "Session", "31amphet", "31anabolic", "31coke", "31crack", "31ghb", "31glue", "31hash", "31heroin", "31ketamine", "31lsd", "31mdma",
        "31mushrooms", "31narcotic", "31relevin", "31tranq"]]
EA

Unnamed: 0,ID,Session,31amphet,31anabolic,31coke,31crack,31ghb,31glue,31hash,31heroin,31ketamine,31lsd,31mdma,31mushrooms,31narcotic,31relevin,31tranq
0,1274,BL,,,,,,,,,,,,,,,
1,22453,BL,,,,,,,,,,,,,,,
2,75717,BL,,,,,,,,,,,,,,,
3,106601,BL,,,,,,,,,,,,,,,
4,106871,BL,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1346,99677574,FU3,,,,,,,,,,,,,,,
1347,99873252,FU3,,,,,,,,,,,,,,,
1348,99875982,FU3,,,,,,,,,,,,,,,
1349,99930021,FU3,,,,,,,,,,,,,,,


In [6]:
EA = EA.set_index('ID')

In [7]:
EA = EA.fillna(0)

In [9]:
# concatenate FTND & EA
EAF = pd.merge(left=EA, right=FTND, how='left', left_on=['ID', 'Session'], right_on=['ID', 'Session'])
EAF

Unnamed: 0,ID,Session,31amphet,31anabolic,31coke,31crack,31ghb,31glue,31hash,31heroin,31ketamine,31lsd,31mdma,31mushrooms,31narcotic,31relevin,31tranq,Likelihood of nicotine dependence child,FTND Sum
0,1274,BL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,0
1,22453,BL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,0
2,75717,BL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,1
3,106601,BL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,0
4,106871,BL,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6766,99677574,FU3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,0
6767,99873252,FU3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,0
6768,99875982,FU3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,0
6769,99930021,FU3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,less dependent,0


## Compute variables of interest

In [10]:
# compute score which is one if FTND OR HASH OR any other substance >0
EAF["Abuse Any"] = (EAF["31amphet"] + EAF["31anabolic"] + EAF["31coke"] + EAF["31crack"] + EAF["31ghb"] + EAF["31glue"]
                          + EAF["31heroin"] + EAF["31ketamine"] + EAF["31lsd"] + EAF["31mdma"] + EAF["31mushrooms"] + EAF["31narcotic"] 
                          + EAF["31relevin"] + EAF["31tranq"])


In [28]:
EAF["Total"] = 0

In [29]:
EAF.loc[EAF['Abuse Any']> 0, 'Total'] = 1
EAF.loc[EAF['31hash']> 0, 'Total'] = 1
EAF.loc[EAF['FTND Sum']> 0, 'Total'] = 1
print(EAF)

            ID Session  31amphet  31anabolic  31coke  31crack  31ghb  31glue  \
0         1274      BL       0.0         0.0     0.0      0.0    0.0     0.0   
1        22453      BL       0.0         0.0     0.0      0.0    0.0     0.0   
2        75717      BL       0.0         0.0     0.0      0.0    0.0     0.0   
3       106601      BL       0.0         0.0     0.0      0.0    0.0     0.0   
4       106871      BL       0.0         0.0     0.0      0.0    0.0     0.0   
...        ...     ...       ...         ...     ...      ...    ...     ...   
6766  99677574     FU3       0.0         0.0     0.0      0.0    0.0     0.0   
6767  99873252     FU3       0.0         0.0     0.0      0.0    0.0     0.0   
6768  99875982     FU3       0.0         0.0     0.0      0.0    0.0     0.0   
6769  99930021     FU3       0.0         0.0     0.0      0.0    0.0     0.0   
6770  99954902     FU3       0.0         0.0     0.0      0.0    0.0     0.0   

      31hash  31heroin  ...  31lsd  31m

### Save preprocessed file

In [30]:
# save 
EAF.to_csv('/ritter/share/data/IMAGEN/posthoc/all_ESPAD_SubAbuse.csv')

# Kessler Psychological Distress Scale (K6+)

In [9]:
# Load data
K6_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/psytools/IMAGEN-IMGN_K6PLUS_PARENT_FU2-IMAGEN_DIGEST.csv", dtype={"User code":str})
K6_FU2["Session"] = "FU2"
K6_FU3 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_K6PLUS_FU3.csv", dtype={"User code":str})
K6_FU3["Session"] = "FU3"

### Cleaning

In [10]:
# combine into 1 data frame
K6 = pd.concat([K6_FU2, K6_FU3])

# clean ID column
K6["User code"] = K6["User code"].apply(lambda x: int(x.replace("-P", "")))
K6 = K6.rename(columns={"User code" : "ID"})

In [11]:
K6 = K6.set_index('ID')

In [12]:
# delete unnecessary columns
K6 = K6.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "Valid", "ts_1", "ts_1_specify", "ts_2", "ts_3", "ts_4"], axis=1)

## compute sum score

In [13]:
# compute sum score out of first 6 items

K6['Total'] = K6['K6PLUS_1a'] + K6['K6PLUS_1b'] + K6['K6PLUS_1c'] + K6['K6PLUS_1d'] + K6['K6PLUS_1e'] + K6['K6PLUS_1f']

In [14]:
K6

Unnamed: 0_level_0,K6PLUS_1a,K6PLUS_1b,K6PLUS_1c,K6PLUS_1d,K6PLUS_1e,K6PLUS_1f,K6PLUS_2,K6PLUS_3,K6PLUS_4,K6PLUS_5,K6PLUS_6,K6PLUS_INTRO2,Session,Total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
112288,5,4,5,4,3,3,2.0,4.0,15.0,0.0,5.0,,FU2,24
215284,5,5,5,4,3,5,4.0,0.0,0.0,1.0,5.0,,FU2,27
240546,3,4,3,4,3,3,4.0,0.0,0.0,0.0,5.0,,FU2,20
297685,4,4,4,4,4,4,4.0,0.0,0.0,0.0,5.0,,FU2,24
308867,4,5,5,5,5,5,4.0,0.0,4.0,0.0,1.0,,FU2,29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,3,3,2,3,2,5,1.0,0.0,0.0,0.0,3.0,,FU3,18
99873252,4,5,5,5,4,5,4.0,0.0,0.0,0.0,5.0,,FU3,28
99875982,5,5,5,5,5,5,4.0,,,,,,FU3,30
99930021,5,5,5,5,4,5,4.0,0.0,2.0,3.0,4.0,,FU3,29


### Save preprocessed file

In [23]:
# save 
K6.to_csv('/ritter/share/data/IMAGEN/posthoc/all_K6+.csv')

NameError: name 'K6' is not defined

# Brief Symptom Inventory (BSI)

In [24]:
# Load data
BSI = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_BSI_FU3.csv", dtype={"User code":str})
BSI["Session"] = "FU3"

### Cleaning

In [25]:
print(BSI.dtypes)

User code              object
Iteration               int64
Language               object
Completed              object
Completed Timestamp     int64
                        ...  
ts_1_specify           object
ts_2                    int64
ts_3                    int64
ts_4                    int64
Session                object
Length: 67, dtype: object


In [26]:
# set index to ID
BSI = BSI.rename(columns={"User code" : "ID"})

In [27]:
BSI = BSI.set_index('ID')

In [28]:
# delete unnecessary columns
BSI = BSI.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "ts_1_specify", "ts_2", "ts_3", "ts_4"], axis=1)

In [29]:
# bug: replace R with NaN
BSI = BSI.replace('R', np.NaN)

In [33]:
# transform to numeric values
# using dictionary to convert specific columns
convert_dict = {'BSI_01' :             int64,
                'BSI_02' :             int64,
                'BSI_03' :             int64,
                'BSI_04' :             int64,
                'BSI_05' :             int64,
                'BSI_06' :              int64,
                'BSI_07' :              int64,
                'BSI_08' :              int64,
                'BSI_09' :              int64,
                'BSI_10' :              int64,
                'BSI_11' :              int64,
                'BSI_12' :              int64,
                'BSI_13' :              int64,
                'BSI_14' :              int64,
                'BSI_15' :              int64,
                'BSI_16' :              int64,
                'BSI_17' :              int64,
                'BSI_18' :              int64,
                'BSI_19' :              int64,
                'BSI_20' :              int64,
                'BSI_21' :              int64,
                'BSI_22' :              int64,
                'BSI_23' :              int64,
                'BSI_24' :              int64,
                'BSI_25' :              int64,
                'BSI_26' :              int64,
                'BSI_27' :              int64,
                'BSI_28' :              int64,
                'BSI_29' :              int64,
                'BSI_30' :              int64,
                'BSI_31' :              int64,
                'BSI_32' :              int64,
                'BSI_33' :              int64,
                'BSI_34' :              int64,
                'BSI_35' :              int64,
                'BSI_36' :              int64,
                'BSI_37' :              int64,
                'BSI_38' :              int64,
                'BSI_39' :              int64,
                'BSI_40' :              int64,
                'BSI_41' :              int64,
                'BSI_42' :              int64,
                'BSI_43' :              int64,
                'BSI_44' :              int64,
                'BSI_45' :              int64,
                'BSI_46' :              int64,
                'BSI_47' :              int64,
                'BSI_48' :              int64,
                'BSI_49' :              int64,
                'BSI_50' :              int64,
                'BSI_51' :              int64,
                'BSI_52' :              int64,
                'BSI_53' :              int64}
BSI = BSI.astype(convert_dict)
print(BSI.dtypes)

NameError: name 'int64' is not defined

In [31]:
print(BSI.dtypes)

BSICheck change      object
BSICheck truth       object
BSI_01              float64
BSI_02               object
BSI_03               object
BSI_04               object
BSI_05                int64
BSI_06               object
BSI_07               object
BSI_08               object
BSI_09               object
BSI_10               object
BSI_11               object
BSI_12               object
BSI_13               object
BSI_14               object
BSI_15               object
BSI_16               object
BSI_17               object
BSI_18               object
BSI_19               object
BSI_20               object
BSI_21               object
BSI_22               object
BSI_23               object
BSI_24               object
BSI_25               object
BSI_26               object
BSI_27               object
BSI_28               object
BSI_29               object
BSI_30               object
BSI_31               object
BSI_32               object
BSI_33               object
BSI_34              

## compute sum score

In [47]:
# compute mean scores per dimension

# Somatization: Items 2, 7, 23, 29, 30, 33, and 37
BSI['Somatization'] = BSI['BSI_02'] + BSI['BSI_07'] + BSI['BSI_23'] + BSI['BSI_29'] + BSI['BSI_30'] + BSI['BSI_33'] + BSI['BSI_37']
# BSI['Somatization'] = BSI['Somatization']/7

#• Obsession-Compulsion: Items 5, 15, 26, 27, 32, and 360
#• Interpersonal Sensitivity: Items 20, 21, 22, and 42
#• Depression: Items 9, 16, 17, 18, 35, and 50
#• Anxiety: Items 1, 12, 19, 38, 45, and 49
#• Hostility: Items 6, 13, 40, 41, and 46
#• Phobic Anxiety: Items 8, 28, 31, 43, and 47
#• Paranoid Ideation: Items 4, 10, 24, 48, and 51
#• Psychoticism: Items 3, 14, 34, 44, and 53. 


In [49]:
BSI.dtypes

BSICheck change     object
BSICheck truth      object
BSI_01              object
BSI_02              object
BSI_03              object
BSI_04              object
BSI_05               int64
BSI_06              object
BSI_07              object
BSI_08              object
BSI_09              object
BSI_10              object
BSI_11              object
BSI_12              object
BSI_13              object
BSI_14              object
BSI_15              object
BSI_16              object
BSI_17              object
BSI_18              object
BSI_19              object
BSI_20              object
BSI_21              object
BSI_22              object
BSI_23              object
BSI_24              object
BSI_25              object
BSI_26              object
BSI_27              object
BSI_28              object
BSI_29              object
BSI_30              object
BSI_31              object
BSI_32              object
BSI_33              object
BSI_34              object
BSI_35              object
B

### Save preprocessed file

In [15]:
# save 
K6.to_csv('/ritter/share/data/IMAGEN/posthoc/all_K6+.csv')

# CES-D

In [34]:
# Load data
CESD = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_CESD_FU3.csv", dtype={"User code":str})
CESD["Session"] = "FU3"

### Cleaning

In [35]:
# clean ID column
CESD["User code"] = CESD["User code"].apply(lambda x: int(x.replace("-P", "")))
CESD = CESD.rename(columns={"User code" : "ID"})

In [36]:
CESD = CESD.set_index('ID')

In [39]:
# delete unnecessary columns
CESD = CESD.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "ts_1_specify", "ts_2", "ts_3", "ts_4"], axis=1)

## compute sum score

In [40]:
# compute sum score out of first 6 items

CESD['Total'] = CESD['CSED_01'] + CESD['CSED_02'] + CESD['CSED_03'] + CESD['CSED_04'] + CESD['CSED_05'] + CESD['CSED_06'] + CESD['CSED_07'] + CESD['CSED_08'] + CESD['CSED_09'] + CESD['CSED_10'] + CESD['CSED_11'] + CESD['CSED_12'] + CESD['CSED_13'] + CESD['CSED_14'] + CESD['CSED_15'] + CESD['CSED_16'] + CESD['CSED_17'] + CESD['CSED_18'] + CESD['CSED_19'] + CESD['CSED_20']


In [41]:
CESD

Unnamed: 0_level_0,CSED_01,CSED_02,CSED_03,CSED_04,CSED_05,CSED_06,CSED_07,CSED_08,CSED_09,CSED_10,...,CSED_13,CSED_14,CSED_15,CSED_16,CSED_17,CSED_18,CSED_19,CSED_20,Session,Total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
112288,1,1,1,1,1,1,1,4,1,1,...,1,2,1,4,2,1,1,1,FU3,31
215284,2,2,2,1,2,2,2,1,1,1,...,3,2,1,2,1,2,2,2,FU3,38
240546,1,1,1,4,2,1,1,3,1,1,...,1,2,1,3,1,1,2,2,FU3,34
297685,1,1,1,3,1,1,1,3,1,1,...,1,1,1,4,1,1,1,1,FU3,30
308867,2,1,1,4,2,1,2,3,1,2,...,2,1,2,4,1,2,1,2,FU3,39
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,1,1,1,3,3,1,1,3,1,1,...,1,1,1,4,1,1,1,2,FU3,33
99873252,2,1,1,4,2,1,2,3,1,1,...,1,1,1,3,3,1,1,1,FU3,35
99875982,1,2,1,3,1,1,1,4,1,1,...,1,1,1,4,1,1,1,1,FU3,32
99930021,1,1,1,4,2,1,2,4,1,1,...,1,1,1,4,1,1,1,1,FU3,35


### Save preprocessed file

In [42]:
# save 
CESD.to_csv('/ritter/share/data/IMAGEN/posthoc/all_CES-D.csv')

# PHQ-9

In [50]:
# Load data
PHQ = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_PHQ_FU3.csv", dtype={"User code":str})
PHQ["Session"] = "FU3"

### Cleaning

In [51]:
# clean ID column
PHQ["User code"] = PHQ["User code"].apply(lambda x: int(x.replace("-P", "")))
PHQ = PHQ.rename(columns={"User code" : "ID"})

In [52]:
PHQ = PHQ.set_index('ID')

In [53]:
# delete unnecessary columns
PHQ = PHQ.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "ts_1_specify", "ts_2", "ts_3", "ts_4"], axis=1)

### compute sum score

In [54]:
# recode into original scale
# invert items 1,7,8,9,10,12,13,15,20,29 and delete non-inverted items
pre = [1,2,3,4]
post = [0,1,2,3]

PHQ['PHQ_1'] = PHQ['PHQ_1'].replace(pre, post)
PHQ['PHQ_2'] = PHQ['PHQ_2'].replace(pre, post)
PHQ['PHQ_3'] = PHQ['PHQ_3'].replace(pre, post)
PHQ['PHQ_4'] = PHQ['PHQ_4'].replace(pre, post)
PHQ['PHQ_5'] = PHQ['PHQ_5'].replace(pre, post)
PHQ['PHQ_6'] = PHQ['PHQ_6'].replace(pre, post)
PHQ['PHQ_7'] = PHQ['PHQ_7'].replace(pre, post)
PHQ['PHQ_8'] = PHQ['PHQ_8'].replace(pre, post)
PHQ['PHQ_9'] = PHQ['PHQ_9'].replace(pre, post)


# compute sum score out of first 6 items
PHQ['Total'] = PHQ['PHQ_1'] + PHQ['PHQ_2'] + PHQ['PHQ_3'] + PHQ['PHQ_4'] + PHQ['PHQ_5'] + PHQ['PHQ_6'] + PHQ['PHQ_7'] + PHQ['PHQ_8'] + PHQ['PHQ_9']


In [55]:
PHQ

Unnamed: 0_level_0,PHQ_1,PHQ_2,PHQ_3,PHQ_4,PHQ_5,PHQ_6,PHQ_7,PHQ_8,PHQ_9,Session,Total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
112288,0,0,0,1,0,0,0,0,0,FU3,1
215284,0,1,3,3,2,0,0,0,0,FU3,9
240546,1,0,1,1,0,0,0,0,0,FU3,3
297685,0,0,0,0,0,1,0,0,0,FU3,1
308867,0,0,0,1,0,0,1,0,0,FU3,2
...,...,...,...,...,...,...,...,...,...,...,...
99580654,1,0,1,0,0,0,0,0,2,FU3,4
99616225,1,1,3,3,3,1,1,2,1,FU3,16
99677574,0,0,0,1,0,0,0,0,0,FU3,1
99930021,1,0,1,1,1,0,0,0,1,FU3,5


### Save preprocessed file

In [56]:
# save 
PHQ.to_csv('/ritter/share/data/IMAGEN/posthoc/all_PHQ.csv')

# ADRS

In [57]:
# Load data
ADRS_FU1 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU1/psytools/IMAGEN-IMGN_ADRS_CHILD_FU-IMAGEN_DIGEST.csv", dtype={"User code":str})
ADRS_FU1["Session"] = "FU1"
ADRS_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/psytools/IMAGEN-IMGN_ADRS_CHILD_FU2-IMAGEN_DIGEST.csv", dtype={"User code":str})
ADRS_FU2["Session"] = "FU2"

### Cleaning

In [59]:
# combine into 1 data frame
ADRS = pd.concat([ADRS_FU1, ADRS_FU2])

In [60]:
# clean ID column
ADRS["User code"] = ADRS["User code"].apply(lambda x: int(x.replace("-C", "")))
ADRS = ADRS.rename(columns={"User code" : "ID"})

In [61]:
ADRS = ADRS.set_index('ID')

In [63]:
# delete unnecessary columns
ADRS = ADRS.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "ts_2", "ts_3", "ts_4"], axis=1)

In [64]:
ADRS

Unnamed: 0_level_0,Valid,adrs1,adrs10,adrs2,adrs3,adrs4,adrs5,adrs6,adrs7,adrs8,adrs9,adrs_sum,Session
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
112288,t,0,0,0,0,0,0,0,0,0,0,0,FU1
215284,t,0,0,0,0,0,0,0,0,0,1,1,FU1
240546,t,0,0,0,0,0,0,0,0,0,0,0,FU1
308867,t,0,0,0,0,0,0,0,0,0,0,0,FU1
397377,f,0,0,0,0,0,0,0,0,0,0,0,FU1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,t,0,0,0,0,0,0,0,0,0,0,0,FU2
99873252,t,0,0,0,0,0,0,0,0,0,0,0,FU2
99875982,t,0,0,0,0,0,0,0,0,0,0,0,FU2
99930021,t,0,0,0,0,0,0,0,0,0,0,0,FU2


### Save preprocessed file

In [65]:
# save 
ADRS.to_csv('/ritter/share/data/IMAGEN/posthoc/all_ADRS.csv')

# ANXDX - Anxiety Screening

In [69]:
# Load data
ANXDX_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/psytools/IMAGEN-IMGN_ANXDX_CHILD_FU2-IMAGEN_DIGEST.csv", dtype={"User code":str})
ANXDX_FU2["Session"] = "FU2"
ANXDX_FU3 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_ANXDX_FU3.csv", dtype={"User code":str})
ANXDX_FU3["Session"] = "FU3"

### Cleaning

In [70]:
# combine into 1 data frame
ANXDX = pd.concat([ANXDX_FU2, ANXDX_FU3])

# clean ID column
ANXDX["User code"] = ANXDX["User code"].apply(lambda x: int(x.replace("-C", "")))
ANXDX = ANXDX.rename(columns={"User code" : "ID"})

In [71]:
ANXDX = ANXDX.set_index('ID')

In [72]:
# delete unnecessary columns
ANXDX = ANXDX.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "ts_1_specify", "ts_2", "ts_3", "ts_4"], axis=1)

### compute sum score

In [73]:
# compute sum score out of first 6 items
ANXDX['Total'] = ANXDX['ANXDX_01'] + ANXDX['ANXDX_02'] + ANXDX['ANXDX_03'] + ANXDX['ANXDX_04'] + ANXDX['ANXDX_05'] + ANXDX['ANXDX_06'] + ANXDX['ANXDX_07'] + ANXDX['ANXDX_08'] + ANXDX['ANXDX_09'] + ANXDX['ANXDX_10'] + ANXDX['ANXDX_11'] + ANXDX['ANXDX_12']


In [74]:
ANXDX

Unnamed: 0_level_0,Valid,ANXDX_01,ANXDX_02,ANXDX_03,ANXDX_04,ANXDX_05,ANXDX_06,ANXDX_07,ANXDX_08,ANXDX_09,...,ANXDX_16_EVER,ANXDX_16_FIRST,ANXDX_16_LAST,ANXDX_16_NOW,ANXDX_17_EVER,ANXDX_17_FIRST,ANXDX_17_LAST,ANXDX_17_NOW,Session,Total
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
112288,t,0,0,0,0,0,0,0,0,0,...,0,,,,0,,,,FU2,0
215284,t,0,2,0,1,1,0,1,0,1,...,0,,,,0,,,,FU2,8
240546,t,0,1,0,0,0,1,0,0,1,...,0,,,,0,,,,FU2,4
297685,t,0,0,1,0,0,0,0,0,0,...,0,,,,0,,,,FU2,2
308867,t,0,0,0,0,0,0,0,0,1,...,0,,,,0,,,,FU2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,,0,0,0,0,0,0,0,0,0,...,0,,,,0,,,,FU3,0
99873252,,0,0,1,0,0,0,0,0,0,...,0,,,,0,,,,FU3,2
99875982,,0,0,0,0,0,0,0,0,0,...,0,,,,0,,,,FU3,0
99930021,,0,0,0,0,0,0,0,0,0,...,0,,,,0,,,,FU3,0


### Save preprocessed file

In [75]:
# save 
ANXDX.to_csv('/ritter/share/data/IMAGEN/posthoc/all_ANXDX.csv')

# CAPE-42

In [18]:
# Load data
CAPE_FU2 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU2/psytools/IMAGEN-IMGN_CAPE_CHILD_FU2-IMAGEN_DIGEST.csv", dtype={"User code":str})
CAPE_FU2["Session"] = "FU2"
CAPE_FU3 = pd.read_csv(f"/ritter/share/data/IMAGEN/IMAGEN_RAW/2.7/FU3/psytools/IMAGEN-IMGN_CAPE_FU3.csv", dtype={"User code":str})
CAPE_FU3["Session"] = "FU3"

### Cleaning

In [19]:
# combine into 1 data frame
CAPE = pd.concat([CAPE_FU2, CAPE_FU3])

# clean ID column
CAPE["User code"] = CAPE["User code"].apply(lambda x: int(x.replace("-C", "")))
CAPE = CAPE.rename(columns={"User code" : "ID"})

In [20]:
CAPE = CAPE.set_index('ID')

In [21]:
# delete unnecessary columns
CAPE = CAPE.drop(["Iteration", "Language", "Completed", "Completed Timestamp", "Processed Timestamp", "ts_1", "ts_1_specify", "ts_2", "ts_3", "ts_4"], axis=1)

### compute sum score

In [22]:
# recode into original scale
pre = [0,1,2,3] 
post = [1,2,3,4]

CAPE['CAPE42_1a'] = CAPE['CAPE42_1a'].replace(pre, post)
CAPE['CAPE42_2a'] = CAPE['CAPE42_2a'].replace(pre, post)
CAPE['CAPE42_3a'] = CAPE['CAPE42_3a'].replace(pre, post)
CAPE['CAPE42_4a'] = CAPE['CAPE42_4a'].replace(pre, post)
CAPE['CAPE42_5a'] = CAPE['CAPE42_5a'].replace(pre, post)
CAPE['CAPE42_6a'] = CAPE['CAPE42_6a'].replace(pre, post)
CAPE['CAPE42_7a'] = CAPE['CAPE42_7a'].replace(pre, post)
CAPE['CAPE42_8a'] = CAPE['CAPE42_8a'].replace(pre, post)
CAPE['CAPE42_9a'] = CAPE['CAPE42_9a'].replace(pre, post)
CAPE['CAPE42_10a'] = CAPE['CAPE42_10a'].replace(pre, post)
CAPE['CAPE42_11a'] = CAPE['CAPE42_11a'].replace(pre, post)
CAPE['CAPE42_12a'] = CAPE['CAPE42_12a'].replace(pre, post)
CAPE['CAPE42_13a'] = CAPE['CAPE42_13a'].replace(pre, post)
CAPE['CAPE42_14a'] = CAPE['CAPE42_14a'].replace(pre, post)
CAPE['CAPE42_15a'] = CAPE['CAPE42_15a'].replace(pre, post)
CAPE['CAPE42_16a'] = CAPE['CAPE42_16a'].replace(pre, post)
CAPE['CAPE42_17a'] = CAPE['CAPE42_17a'].replace(pre, post)
CAPE['CAPE42_18a'] = CAPE['CAPE42_18a'].replace(pre, post)
CAPE['CAPE42_19a'] = CAPE['CAPE42_19a'].replace(pre, post)
CAPE['CAPE42_20a'] = CAPE['CAPE42_20a'].replace(pre, post)
CAPE['CAPE42_21a'] = CAPE['CAPE42_21a'].replace(pre, post)
CAPE['CAPE42_22a'] = CAPE['CAPE42_22a'].replace(pre, post)
CAPE['CAPE42_23a'] = CAPE['CAPE42_23a'].replace(pre, post)
CAPE['CAPE42_24a'] = CAPE['CAPE42_24a'].replace(pre, post)
CAPE['CAPE42_25a'] = CAPE['CAPE42_25a'].replace(pre, post)
CAPE['CAPE42_26a'] = CAPE['CAPE42_26a'].replace(pre, post)
CAPE['CAPE42_27a'] = CAPE['CAPE42_27a'].replace(pre, post)
CAPE['CAPE42_28a'] = CAPE['CAPE42_28a'].replace(pre, post)
CAPE['CAPE42_29a'] = CAPE['CAPE42_29a'].replace(pre, post)
CAPE['CAPE42_30a'] = CAPE['CAPE42_30a'].replace(pre, post)
CAPE['CAPE42_31a'] = CAPE['CAPE42_31a'].replace(pre, post)
CAPE['CAPE42_32a'] = CAPE['CAPE42_32a'].replace(pre, post)
CAPE['CAPE42_33a'] = CAPE['CAPE42_33a'].replace(pre, post)
CAPE['CAPE42_34a'] = CAPE['CAPE42_34a'].replace(pre, post)
CAPE['CAPE42_35a'] = CAPE['CAPE42_35a'].replace(pre, post)
CAPE['CAPE42_36a'] = CAPE['CAPE42_36a'].replace(pre, post)
CAPE['CAPE42_37a'] = CAPE['CAPE42_37a'].replace(pre, post)
CAPE['CAPE42_38a'] = CAPE['CAPE42_38a'].replace(pre, post)
CAPE['CAPE42_39a'] = CAPE['CAPE42_39a'].replace(pre, post)
CAPE['CAPE42_40a'] = CAPE['CAPE42_40a'].replace(pre, post)
CAPE['CAPE42_41a'] = CAPE['CAPE42_41a'].replace(pre, post)
CAPE['CAPE42_42a'] = CAPE['CAPE42_42a'].replace(pre, post)



In [23]:
# compute sum score 

# Positive Dimension Frequency Score
CAPE["Positive Frequency"] = CAPE["CAPE42_2a"] + CAPE["CAPE42_5a"] + CAPE["CAPE42_6a"] + CAPE["CAPE42_7a"] + CAPE["CAPE42_10a"] + CAPE["CAPE42_11a"] + CAPE["CAPE42_13a"] +  CAPE["CAPE42_15a"] + CAPE["CAPE42_17a"] + CAPE["CAPE42_20a"] + CAPE["CAPE42_22a"] + CAPE["CAPE42_24a"] + CAPE["CAPE42_26a"] + CAPE["CAPE42_28a"] + CAPE["CAPE42_30a"] + CAPE["CAPE42_31a"] + CAPE["CAPE42_33a"] + CAPE["CAPE42_34a"] + CAPE["CAPE42_41a"] + CAPE["CAPE42_42a"]

# Positive Dimension Distress Score
CAPE["Positive Distress"] = CAPE["CAPE42_2b"] + CAPE["CAPE42_5b"] + CAPE["CAPE42_6b"] + CAPE["CAPE42_7b"] + CAPE["CAPE42_10b"] + CAPE["CAPE42_11b"] + CAPE["CAPE42_13b"] +  CAPE["CAPE42_15b"] + CAPE["CAPE42_17b"] + CAPE["CAPE42_20b"] + CAPE["CAPE42_22b"] + CAPE["CAPE42_24b"] + CAPE["CAPE42_26b"] + CAPE["CAPE42_28b"] + CAPE["CAPE42_30b"] + CAPE["CAPE42_31b"] + CAPE["CAPE42_33b"] + CAPE["CAPE42_34b"] + CAPE["CAPE42_41b"] + CAPE["CAPE42_42b"]

# Depressive Dimension Frequency Score
CAPE["Depression Frequency"] = CAPE["CAPE42_1a"] + CAPE["CAPE42_9a"] + CAPE["CAPE42_12a"] + CAPE["CAPE42_14a"] + CAPE["CAPE42_19a"] + CAPE["CAPE42_38a"] + CAPE["CAPE42_39a"] +  CAPE["CAPE42_40a"] 

# Depressive Dimension Distress Score
CAPE["Depression Distress"] = CAPE["CAPE42_1b"] + CAPE["CAPE42_9b"] + CAPE["CAPE42_12b"] + CAPE["CAPE42_14b"] + CAPE["CAPE42_19b"] + CAPE["CAPE42_38b"] + CAPE["CAPE42_39b"] +  CAPE["CAPE42_40b"] 

# Negative Dimension Frequency Score
CAPE["Negative Frequency"] = CAPE["CAPE42_3a"] + CAPE["CAPE42_4a"] + CAPE["CAPE42_8a"] + CAPE["CAPE42_16a"] + CAPE["CAPE42_18a"] + CAPE["CAPE42_21a"] + CAPE["CAPE42_23a"] +  CAPE["CAPE42_25a"] + CAPE["CAPE42_27a"] + CAPE["CAPE42_29a"] + CAPE["CAPE42_32a"] + CAPE["CAPE42_35a"] + CAPE["CAPE42_36a"] + CAPE["CAPE42_37a"] 

# Negative Dimension Distress Score
CAPE["Negative Distress"] = CAPE["CAPE42_3b"] + CAPE["CAPE42_4b"] + CAPE["CAPE42_8b"] + CAPE["CAPE42_16b"] + CAPE["CAPE42_18b"] + CAPE["CAPE42_21b"] + CAPE["CAPE42_23b"] +  CAPE["CAPE42_25b"] + CAPE["CAPE42_27b"] + CAPE["CAPE42_29b"] + CAPE["CAPE42_32b"] + CAPE["CAPE42_35b"] + CAPE["CAPE42_36b"] + CAPE["CAPE42_37b"] 

# Total Frequency Score
CAPE["Total Frequency"] = CAPE["Positive Frequency"] + CAPE["Depression Frequency"] + CAPE["Negative Frequency"]

In [24]:
CAPE

Unnamed: 0_level_0,Valid,CAPE42_10a,CAPE42_10b,CAPE42_11a,CAPE42_11b,CAPE42_12a,CAPE42_12b,CAPE42_13a,CAPE42_13b,CAPE42_14a,...,Session,CAPECheck change,CAPECheck truth,Positive Frequency,Positive Distress,Depression Frequency,Depression Distress,Negative Frequency,Negative Distress,Total Frequency
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
112288,t,1,,2.0,1.0,1,,2.0,1.0,1.0,...,FU2,,,23.0,,11.0,,14.0,,48.0
215284,t,2,2.0,1.0,,1,,1.0,,1.0,...,FU2,,,27.0,,14.0,,25.0,,66.0
240546,t,1,,2.0,1.0,2,3.0,2.0,1.0,1.0,...,FU2,,,25.0,,13.0,,18.0,,56.0
297685,t,1,,1.0,,1,,1.0,,1.0,...,FU2,,,20.0,,11.0,,15.0,,46.0
308867,t,1,,1.0,,1,,1.0,,1.0,...,FU2,,,20.0,,10.0,,14.0,,44.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99677574,,1,,1.0,,1,,1.0,,1.0,...,FU3,N,Y,21.0,,10.0,,17.0,,48.0
99873252,,1,,2.0,1.0,1,,1.0,,1.0,...,FU3,N,Y,24.0,,13.0,,24.0,,61.0
99875982,,1,,1.0,,1,,1.0,,1.0,...,FU3,N,Y,21.0,,10.0,,17.0,,48.0
99930021,,1,,1.0,,1,,1.0,,1.0,...,FU3,N,Y,22.0,,12.0,,19.0,,53.0


### Save preprocessed file

In [25]:
# save 
CAPE.to_csv('/ritter/share/data/IMAGEN/posthoc/all_CAPE.csv')