## Create INSTRUMENT files using dataset, and generate a Run file

POSTHOC PREPROCESSING PIPELINE

In [1]:
import pandas as pd
from glob import glob
from itertools import chain, repeat

from imagen_instrumentloader import *

## 1. Load the dataset and create the INSTRUMENT files

### 1.1. PSYCHOLOGICAL PROFILE

#### 1.1.1. NEO-PI-R with newlbls-fu3-espad-fu3-19a-binge-n650 in ALL session (BL, FU1, FU2, and FU3)

In [2]:
NEO = IMAGEN_instrument()

<b> Load the dataset, and get INSTRUMENT files

In [3]:
df_binge_BL_NEO = NEO.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",                # h5files
    "BL",                                                     # session
    "IMAGEN-IMGN_NEO_FFI_CHILD_RC5-IMAGEN_SURVEY_DIGEST.csv", # instrument
    "NEO"                                                     # name
)
df_binge_FU1_NEO = NEO.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",                   # h5files
    "FU1",                                                       # session
    "IMAGEN-IMGN_NEO_FFI_CHILD_FU_RC5-IMAGEN_SURVEY_DIGEST.csv", # instrument
    "NEO"                                                        # name
)
df_binge_FU2_NEO = NEO.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",          # h5files
    "FU2",                                              # session
    "IMAGEN-IMGN_NEO_FFI_FU2-IMAGEN_SURVEY_DIGEST.csv", # instrument
    "NEO"                                               # name
)
df_binge_FU3_NEO = NEO.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5", # h5files
    "FU3",                                     # session
    "IMAGEN-IMGN_NEO_FFI_FU3.csv",             # instrument
    "NEO"                                      # roi name
)                                     

<b> Create the INSTURMENT files </b>

In [4]:
session = [df_binge_BL_NEO, df_binge_FU1_NEO,
           df_binge_FU2_NEO, df_binge_FU3_NEO]

keys = ['BL','FU1','FU2','FU3']

df_binge_NEO = NEO.to_instrument(
    session, keys, save = True
)
col_binge_NEO = df_binge_NEO.columns[:-6]
df_binge_NEO.groupby(['Session','Class'])[col_binge_NEO].mean()

"""
Examples
--------
>>> from imagen_instrumentloader import *
>>> NEO = IMAGEN_instrument()
>>> df_binge_NEO = NEO.read_instrument(
...     'newlbls-fu3-espad-fu3-19a-binge-n650_NEO.csv'
... )
>>> col_binge_NEO = df_binge_NEO.columns[:-6]
>>> df_binge_NEO.groupby(['Session','Class'])[col_binge_NEO].mean()
"""

"\nExamples\n--------\n>>> from imagen_instrumentloader import *\n>>> NEO = IMAGEN_instrument()\n>>> df_binge_NEO = NEO.read_instrument(\n...     'newlbls-fu3-espad-fu3-19a-binge-n650_NEO.csv'\n... )\n>>> col_binge_NEO = df_binge_NEO.columns[:-6]\n>>> df_binge_NEO.groupby(['Session','Class'])[col_binge_NEO].mean()\n"

#### 1.1.2. SURPS with newlbls-fu3-espad-fu3-19a-binge-n650 in all session (BL, FU1, FU2, and FU3)

In [5]:
SURPS = IMAGEN_instrument()

<b> Load the dataset, and get INSTRUMENT files

In [6]:
df_binge_BL_SURPS = SURPS.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",        # h5files
    "BL",                                             # session
    "IMAGEN-IMGN_SURPS_RC5-IMAGEN_SURVEY_DIGEST.csv", # instrument
    "SURPS"                                           # roi name
)
df_binge_FU1_SURPS = SURPS.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",           # h5files
    "FU1",                                               # session
    "IMAGEN-IMGN_SURPS_FU_RC5-IMAGEN_SURVEY_DIGEST.csv", # instrument
    "SURPS"                                              # roi name
)
df_binge_FU2_SURPS = SURPS.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",        # h5files
    "FU2",                                            # session
    "IMAGEN-IMGN_SURPS_FU2-IMAGEN_SURVEY_DIGEST.csv", # instrument
    "SURPS"                                           # roi name
)
df_binge_FU3_SURPS = SURPS.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5", # h5files
    "FU3",                                     # session
    "IMAGEN-IMGN_SURPS_FU3.csv",               # instrument
    "SURPS"                                    # roi name
)

<b> Create the INSTURMENT files </b>

In [7]:
session = [df_binge_BL_SURPS, df_binge_FU1_SURPS,
           df_binge_FU2_SURPS, df_binge_FU3_SURPS]

keys = ['BL','FU1','FU2','FU3']

df_binge_SURPS = SURPS.to_instrument(
    session, keys, save = True
)
col_binge_SURPS = df_binge_SURPS.columns[:-6]
df_binge_SURPS.groupby(['Session','Class'])[col_binge_SURPS].mean()

"""
Examples
--------
>>> from imagen_instrumentloader import *
>>> SURPS = IMAGEN_instrument()
>>> df_binge_SURPS = SURPS.read_instrument(
...     'newlbls-fu3-espad-fu3-19a-binge-n650_SURPS.csv'
... )
>>> col_binge_SURPS = df_binge_SURPS.columns[:-6]
>>> df_binge_SURPS.groupby(['Session','Class'])[col_binge_SURPS].mean()
"""

"\nExamples\n--------\n>>> from imagen_instrumentloader import *\n>>> SURPS = IMAGEN_instrument()\n>>> df_binge_SURPS = SURPS.read_instrument(\n...     'newlbls-fu3-espad-fu3-19a-binge-n650_SURPS.csv'\n... )\n>>> col_binge_SURPS = df_binge_SURPS.columns[:-6]\n>>> df_binge_SURPS.groupby(['Session','Class'])[col_binge_SURPS].mean()\n"

### 1.2. SOCIO-ECONOMIC PROFILE

#### 1.2.1. CTQ-SF with newlbls-fu3-espad-fu3-19a-binge-n650 in One session (FU2)

In [8]:
CTQ = IMAGEN_instrument()

<b> Load the dataset, and get INSTRUMENT files

In [9]:
df_binge_CTQ_FU2 = CTQ.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",     # h5files
    "FU2",                                         # session
    "IMAGEN-IMGN_CTQ_CHILD_FU2-IMAGEN_DIGEST.csv", # instrument
    "CTQ"                                          # name
)

<b> Create the INSTURMENT files </b>

In [10]:
session = [df_binge_CTQ_FU2]

keys = ['FU2']

df_binge_CTQ = CTQ.to_instrument(
    session, keys, save = True
)
col_binge_CTQ = df_binge_CTQ.columns[:-6]
df_binge_CTQ.groupby(['Session','Class'])[col_binge_CTQ].mean()

"""
Examples
--------
>>> from imagen_instrumentloader import *
>>> CTQ = IMAGEN_instrument()
>>> df_binge_CTQ = CTQ.read_instrument(
...     'newlbls-fu3-espad-fu3-19a-binge-n650_CTQ.csv'
... )
>>> col_binge_CTQ = df_binge_CTQ.columns[:-6]
>>> df_binge_CTQ.groupby(['Session','Class'])[col_binge_CTQ].mean()
"""

"\nExamples\n--------\n>>> from imagen_instrumentloader import *\n>>> CTQ = IMAGEN_instrument()\n>>> df_binge_CTQ = CTQ.read_instrument(\n...     'newlbls-fu3-espad-fu3-19a-binge-n650_CTQ.csv'\n... )\n>>> col_binge_CTQ = df_binge_CTQ.columns[:-6]\n>>> df_binge_CTQ.groupby(['Session','Class'])[col_binge_CTQ].mean()\n"

#### 1.2.2 LEQ with newlbls-fu3-espad-fu3-19a-binge-n650 in All session (BL, FU1, FU2, and FU3)

In [11]:
LEQ = IMAGEN_instrument()

<b> Load the dataset, and get INSTRUMENT files

In [12]:
df_binge_BL_LEQ = LEQ.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5", # h5files
    "BL",                                      # session
    "IMAGEN-IMGN_LEQ_RC5-BASIC_DIGEST.csv",    # instrument
    "LEQ"                                      # name
)
df_binge_FU1_LEQ = LEQ.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",  # h5files
    "FU1",                                      # session
    "IMAGEN-IMGN_LEQ_FU_RC5-IMAGEN_DIGEST.csv", # instrument
    "LEQ"                                       # name
)
df_binge_FU2_LEQ = LEQ.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5", # h5files
    "FU2",                                     # session
    "IMAGEN-IMGN_LEQ_FU2-IMAGEN_DIGEST.csv",   # instrument
    "LEQ"                                      # name
)
df_binge_FU3_LEQ = LEQ.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5", # h5files
    "FU3",                                     # session
    "IMAGEN-IMGN_LEQ_FU3.csv",                 # instrument
    "LEQ"                                      # name
)

<b> Create the INSTURMENT files </b>

In [13]:
session = [df_binge_BL_LEQ, df_binge_FU1_LEQ,
           df_binge_FU2_LEQ, df_binge_FU3_LEQ]

keys = ['BL','FU1','FU2','FU3']

df_binge_LEQ = LEQ.to_instrument(
    session, keys, save = True
)
col_binge_LEQ = df_binge_LEQ.columns[:-6]
df_binge_LEQ.groupby(['Session','Class'])[col_binge_LEQ].mean()

"""
Examples
--------
>>> from imagen_instrumentloader import *
>>> LEQ = IMAGEN_instrument()
>>> df_binge_LEQ = LEQ.read_instrument(
...     'newlbls-fu3-espad-fu3-19a-binge-n650_LEQ.csv'
... )
>>> col_binge_LEQ = df_binge_LEQ.columns[:-6]
>>> df_binge_LEQ.groupby(['Session','Class'])[col_binge_LEQ].mean()
"""

"\nExamples\n--------\n>>> from imagen_instrumentloader import *\n>>> LEQ = IMAGEN_instrument()\n>>> df_binge_LEQ = LEQ.read_instrument(\n...     'newlbls-fu3-espad-fu3-19a-binge-n650_LEQ.csv'\n... )\n>>> col_binge_LEQ = df_binge_LEQ.columns[:-6]\n>>> df_binge_LEQ.groupby(['Session','Class'])[col_binge_LEQ].mean()\n"

#### 1.2.3. PBQ with newlbls-fu3-espad-fu3-19a-binge-n650 in Two session (BL, and FU1)

In [14]:
PBQ = IMAGEN_instrument()

<b> Load the dataset, and get INSTRUMENT files

In [15]:
df_binge_BL_PBQ = PBQ.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5", # h5files
    "BL",                                      # session
    "IMAGEN-IMGN_PBQ_RC1-BASIC_DIGEST.csv",    # instrument
    "PBQ"                                      # roi name
)
df_binge_FU1_PBQ = PBQ.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5", # h5files
    "FU1",                                     # session
    "IMAGEN-IMGN_PBQ_FU_RC1-BASIC_DIGEST.csv", # instrument
    "PBQ"                                      # name
)

<b> Create the INSTURMENT files </b>

In [16]:
session = [df_binge_BL_PBQ, df_binge_FU1_PBQ]

keys = ['BL','FU1']

df_binge_PBQ = PBQ.to_instrument(
    session, keys, save = True
)
col_binge_PBQ = df_binge_PBQ.columns[:-6]
df_binge_PBQ.groupby(['Session','Class'])[col_binge_PBQ].count()

"""
Examples
--------
>>> from imagen_instrumentloader import *
>>> PBQ = IMAGEN_instrument()
>>> df_binge_PBQ = PBQ.read_instrument(
...     'newlbls-fu3-espad-fu3-19a-binge-n650_PBQ.csv'
... )
>>> col_binge_PBQ = df_binge_PBQ.columns[:-6]
>>> df_binge_PBQ.groupby(['Session','Class'])[col_binge_PBQ].count()
>>> df_binge_PBQ.info()
"""

"\nExamples\n--------\n>>> from imagen_instrumentloader import *\n>>> PBQ = IMAGEN_instrument()\n>>> df_binge_PBQ = PBQ.read_instrument(\n...     'newlbls-fu3-espad-fu3-19a-binge-n650_PBQ.csv'\n... )\n>>> col_binge_PBQ = df_binge_PBQ.columns[:-6]\n>>> df_binge_PBQ.groupby(['Session','Class'])[col_binge_PBQ].count()\n>>> df_binge_PBQ.info()\n"

#### 1.2.4. CTS with newlbls-fu3-espad-fu3-19a-binge-n650 in One session (BL)

In [17]:
CTS = IMAGEN_instrument()

<b> Load the dataset, and get INSTRUMENT files

In [18]:
df_binge_BL_CTS = CTS.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",     # h5files
    "BL",                                          # session
    "IMAGEN-IMGN_CTS_PARENT_RC5-BASIC_DIGEST.csv", # instrument
    "CTS"                                          # name
)

<b> Create the INSTURMENT files </b>

In [19]:
session = [df_binge_BL_CTS]

keys = ['BL']

df_binge_CTS = CTS.to_instrument(
    session, keys, save = True
)
col_binge_CTS = df_binge_CTS.columns[:-6]
df_binge_CTS.groupby(['Session','Class'])[col_binge_CTS].mean()

"""
Examples
--------
>>> from imagen_instrumentloader import *
>>> CTS = IMAGEN_instrument()
>>> df_binge_CTS = CTS.read_instrument(
...     'newlbls-fu3-espad-fu3-19a-binge-n650_CTS.csv'
... )
>>> col_binge_CTS = df_binge_CTS.columns[:-6]
>>> df_binge_CTS.groupby(['Session','Class'])[col_binge_CTS].mean()
"""

"\nExamples\n--------\n>>> from imagen_instrumentloader import *\n>>> CTS = IMAGEN_instrument()\n>>> df_binge_CTS = CTS.read_instrument(\n...     'newlbls-fu3-espad-fu3-19a-binge-n650_CTS.csv'\n... )\n>>> col_binge_CTS = df_binge_CTS.columns[:-6]\n>>> df_binge_CTS.groupby(['Session','Class'])[col_binge_CTS].mean()\n"

#### 1.2.5. GEN with newlbls-fu3-espad-fu3-19a-binge-n650 in One session (BL)

In [20]:
GEN = IMAGEN_instrument()

<b> Load the dataset, and get INSTRUMENT files

In [21]:
df_binge_BL_GEN = GEN.get_instrument(
    "newlbls-fu3-espad-fu3-19a-binge-n650.h5",     # h5files
    "BL",                                          # session
    "IMAGEN-IMGN_GEN_RC5-BASIC_DIGEST.csv",        # instrument
    "GEN"                                          # name
)
col_binge_GEN = df_binge_BL_GEN.columns[:-6]

<b> Create the INSTURMENT files </b>

In [22]:
session = [df_binge_BL_GEN]

keys = ['BL']

df_binge_GEN = GEN.to_instrument(
    session, keys, save = True
)
col_binge_GEN = df_binge_GEN.columns[:-6]

"""
Examples
--------
>>> from imagen_instrumentloader import *
>>> GEN = IMAGEN_instrument()
>>> df_binge_GEN = GEN.read_instrument(
...     'newlbls-fu3-espad-fu3-19a-binge-n650_GEN.csv'
... )
>>> col_binge_GEN = df_binge_GEN.columns[:-6]
>>> df_binge_GEN.groupby(['Session','Class'])[col_binge_GEN].count()
"""

"\nExamples\n--------\n>>> from imagen_instrumentloader import *\n>>> GEN = IMAGEN_instrument()\n>>> df_binge_GEN = GEN.read_instrument(\n...     'newlbls-fu3-espad-fu3-19a-binge-n650_GEN.csv'\n... )\n>>> col_binge_GEN = df_binge_GEN.columns[:-6]\n>>> df_binge_GEN.groupby(['Session','Class'])[col_binge_GEN].count()\n"

#### 1.2.6. BMI with newlbls-fu3-espad-fu3-19a-binge-n650 in One session (BL, FU1, FU2, and FU3)

(to do) However, there is no BMI data

<b> Load the dataset, and get INSTRUMENT files

(to do)

<b> Create the INSTURMENT files </b>

(to do)

### 1.3. OTHER CO-MORBIDITIES

#### 1.3.1. FTND with newlbls-fu3-espad-fu3-19a-binge-n650 in All session (BL, FU1, FU2, and FU3)

(to do)

<b> Load the dataset, and get INSTRUMENT files

(to do)

<b> Create the INSTURMENT files </b>

(to do)

#### 1.3.2. DAST with newlbls-fu3-espad-fu3-19a-binge-n650 in All session (BL, FU1, FU2, and FU3)

(to do)

<b> Load the dataset, and get INSTRUMENT files

(to do)

<b> Create the INSTURMENT files </b>

#### 1.3.3. SCID-SUD with newlbls-fu3-espad-fu3-19a-binge-n650 in All session (BL, FU1, FU2, and FU3)

(to do)

<b> Load the dataset, and get INSTRUMENT files

(to do)

<b> Create the INSTURMENT files </b>

(to do)

#### 1.3.4. DMQ with newlbls-fu3-espad-fu3-19a-binge-n650 in One session (FU3)

(to do)

<b> Load the dataset, and get INSTRUMENT files

(to do)

<b> Create the INSTURMENT files </b>

(to do)

#### 1.3.5. BSI-53 with newlbls-fu3-espad-fu3-19a-binge-n650 in One session (FU3)

(to do)

<b> Load the dataset, and get INSTRUMENT files

(to do)

<b> Create the INSTURMENT files </b>

(to do)

#### 1.3.6. AUDIT with newlbls-fu3-espad-fu3-19a-binge-n650 in All session (BL, FU1, FU2, and FU3)

(to do)

<b> Load the dataset, and get INSTRUMENT files

(to do)

<b> Create the INSTURMENT files </b>

(to do)

#### 1.3.7. MAST with newlbls-fu3-espad-fu3-19a-binge-n650 in All session (BL, FU1, FU2, and FU3)

(to do)

<b> Load the dataset, and get INSTRUMENT files

(to do)

<b> Create the INSTURMENT files </b>

(to do)

## 2. Generate the RUN file for posthoc-analysis

### 2.1. Generate the dataset RUN file

#### 2.1.1 Validation dataset result to RUN file

<b> Load the dataset <b/>

In [23]:
df = pd.read_csv(sorted(glob("results/newlbls-fu3-espad-fu3-19a-binge-*/*/run.csv"))[-1])

# This part need to be updated using directly h5files
CTQ = IMAGEN_instrument()
binge_CTQ = CTQ.read_instrument(
    'newlbls-fu3-espad-fu3-19a-binge-n650_CTQ.csv'
)

<b> Generate the RUN dataframe </b>

In [24]:
test_id = sum([df['test_ids'][i].strip('][').split(', ') for i in range(28)], [])
test_probs = np.array([prob_lists for probs in df["test_probs"] for prob_lists in eval(probs)])[:,1]
test_lbls = sum([df['test_lbls'][i].strip('][').split(', ') for i in range(28)], [])
model = [g for (g, dfi) in df.groupby(["i", "o", "technique", "model"])]
test_model = list(chain.from_iterable((repeat(number, 650) for number in model)))
col_binge_CTQ = binge_CTQ.columns[:-6]

df2 = pd.DataFrame({
    "ID" : test_id,
    "Model" : test_model,
    "Probability" : test_probs, 
    "Labels" : test_lbls,})
DF = df2.astype({"ID":'int', "Probability":'float', "Labels":'float'})

DF = pd.merge(DF, binge_CTQ[['ID','Site','Sex','Class']], on='ID', how = 'outer')

<b> Save the RUN file

In [25]:
DF.to_csv('newlbls-fu3-espad-fu3-19a-binge-n650_val_run.csv', index=False)

#### 2.2.1 Test dataset result to RUN file

<b> Load the dataset <b/>

(to do)

<b> Generate the RUN dataframe </b>

(to do)

<b> Save the RUN file

(to do)