In [None]:
import csv
import datetime
import pandas as pd
import numpy as np
import scipy.stats as ss
import time
from datetime import date, datetime
import ukbiobank.utils.utils
from ukbiobank.utils import fieldNamesToIds
from ukbiobank.utils import loadCsv
from ukbiobank.utils import addFields
from ukbiobank.utils.utils import getFieldnames
from ukbiobank.utils.utils import fieldIdsToNames

# T1w Modalities (IDPs)

- 1101	Regional grey matter volumes (FAST)	139
- 1102	Subcortical volumes (FIRST)	14
- 190	Freesurfer ASEG	99 - 2
- 195	Freesurfer BA exvivo	84
- 197	Freesurfer a2009s	444
- 196	Freesurfer DKT	186
- 194	Freesurfer desikan gw	70
- 193	Freesurfer desikan pial	66
- 192	Freesurfer desikan white	202
- 191	Freesurfer subsegmentation	121

T1 measures:
- Volume of GM
- Volume of subcortical structures
- Volume of CSF
- Volume of ventricles
- Volume of CC
- Volume of subcortical - whole brain
- Volume of Cerebellum!
- Mean intensity
- Area
- Mean thickness
- Grey-white contrast

In [None]:
# Upload UK Bioabank csv
csv_path = '/ukbbdata/ukbb_oct23/ukb.csv'
ukb = ukbiobank.ukbio(ukb_csv=csv_path)

### 1. Regional grey matter volumes (FAST): 139

Get the fields

In [None]:
df_struct_fast = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
25888,
25889,
25822,
25823,
25892,
25880,
25881,
25864,
25865,
25838,
25839,
25840,
25841,
25900,
25902,
25901,
25903,
25905,
25904,
25844,
25845,
25830,
25831,
25862,
25863,
25846,
25847,
25782,
25783,
25870,
25871,
25886,
25887,
25893,
25894,
25915,
25917,
25916,
25792,
25793,
25790,
25791,
25808,
25809,
25810,
25811,
25812,
25813,
25784,
25785,
25828,
25829,
25832,
25833,
25826,
25827,
25824,
25825,
25852,
25853,
25788,
25789,
25802,
25803,
25804,
25805,
25806,
25807,
25860,
25861,
25876,
25877,
25884,
25885,
25836,
25837,
25848,
25849,
25850,
25851,
25866,
25867,
25868,
25869,
25872,
25873,
25814,
25815,
25794,
25795,
25842,
25843,
25882,
25883,
25834,
25835,
25786,
25787,
25816,
25817,
25798,
25799,
25800,
25801,
25874,
25875,
25818,
25819,
25820,
25821,
25854,
25855,
25856,
25857,
25858,
25859,
25796,
25797,
25878,
25879,
25895,
25896,
25897,
25899,
25898,
25909,
25911,
25910,
25912,
25914,
25913,
25906,
25908,
25907,
25890,
25891,
25918,
25919,
25920], instance=2)

In [None]:
struct_fast_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_fast)

Drop NAs, remove instance number, and save

In [None]:
struct_fast_names_nona = struct_fast_names.dropna(axis=0)
struct_fast_names_nona.columns = struct_fast_names_nona.columns.str.replace('-2.0', '')
struct_fast_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_fast_names_nona.csv', index=False)
struct_fast_names_nona

### 2. Subcortical volumes (FIRST): 14

Get the fields

In [None]:
df_struct_sub_first = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
25023,
25024,
25021,
25022,
25013,
25014,
25019,
25020,
25017,
25018,
25015,
25016,
25011,
25012], instance=2)

In [None]:
struct_sub_first = addFields(ukbio=ukb, df=df_struct_sub_first, fields=['eid',
25023,
25024,
25021,
25022,
25013,
25014,
25019,
25020,
25017,
25018,
25015,
25016,
25011,
25012], instances=2)
struct_sub_first_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_sub_first)

Drop NAs, remove instance number, and save

In [None]:
struct_sub_first_names_nona = struct_sub_first_names.dropna(axis=0)
struct_sub_first_names_nona.columns = struct_sub_first_names_nona.columns.str.replace('-2.0', '')
struct_sub_first_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_sub_first_names_nona.csv', index=False)
struct_sub_first_names_nona

### 3. Freesurfer ASEG

### 3.1. Freesurfer ASEG, Mean intensity whole brain: 41

#### 3.1.1 Freesurfer ASEG, Mean intensity whole brain

- 26501	Mean intensity of 3rd-Ventricle (whole brain)		
- 26502	Mean intensity of 4th-Ventricle (whole brain)		
- 26503	Mean intensity of 5th-Ventricle (whole brain)	
- 26504	Mean intensity of Brain-Stem (whole brain)		
- 26513	Mean intensity of CC-Anterior (whole brain)		
- 26511	Mean intensity of CC-Central (whole brain)		
- 26512	Mean intensity of CC-Mid-Anterior (whole brain)		
- 26510	Mean intensity of CC-Mid-Posterior (whole brain)		
- 26509	Mean intensity of CC-Posterior (whole brain)		
- 26505	Mean intensity of CSF (whole brain)		
- 26508	Mean intensity of Optic-Chiasm (whole brain)
- 26506	Mean intensity of WM-hypointensities (whole brain)			
- 26507	Mean intensity of non-WM-hypointensities (whole brain)						

#### 3.1.2 Freesurfer ASEG, Mean intensity left/right

- 26548	Mean intensity of Accumbens-area (left hemisphere)			
- 26579	Mean intensity of Accumbens-area (right hemisphere)			
- 26547	Mean intensity of Amygdala (left hemisphere)			
- 26578	Mean intensity of Amygdala (right hemisphere)			
- 26543	Mean intensity of Caudate (left hemisphere)			
- 26574	Mean intensity of Caudate (right hemisphere)			
- 26541	Mean intensity of Cerebellum-Cortex (left hemisphere)			
- 26572	Mean intensity of Cerebellum-Cortex (right hemisphere)			
- 26540	Mean intensity of Cerebellum-White-Matter (left hemisphere)			
- 26571	Mean intensity of Cerebellum-White-Matter (right hemisphere)			
- 26546	Mean intensity of Hippocampus (left hemisphere)			
- 26577	Mean intensity of Hippocampus (right hemisphere)			
- 26539	Mean intensity of Inf-Lat-Vent (left hemisphere)			
- 26570	Mean intensity of Inf-Lat-Vent (right hemisphere)			
- 26538	Mean intensity of Lateral-Ventricle (left hemisphere)			
- 26569	Mean intensity of Lateral-Ventricle (right hemisphere)		
- 26545	Mean intensity of Pallidum (left hemisphere)			
- 26576	Mean intensity of Pallidum (right hemisphere)			
- 26544	Mean intensity of Putamen (left hemisphere)			
- 26575	Mean intensity of Putamen (right hemisphere)			
- 26542	Mean intensity of Thalamus-Proper (left hemisphere)			
- 26573	Mean intensity of Thalamus-Proper (right hemisphere)			
- 26549	Mean intensity of VentralDC (left hemisphere)			
- 26580	Mean intensity of VentralDC (right hemisphere)	
- 26551	Mean intensity of choroid-plexus (left hemisphere)			
- 26582	Mean intensity of choroid-plexus (right hemisphere)						
- 26550	Mean intensity of vessel (left hemisphere)			
- 26581	Mean intensity of vessel (right hemisphere)			

Get the fields

In [None]:
df_struct_aseg_mean_intensity = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
26501,
26502,
26503,
26504,
26513,
26511,
26512,
26510,
26509,
26505,
26508,
26506,
26507,
26548,
26579,
26547,
26578,
26543,
26574,
26541,
26572,
26540,
26571,
26546,
26577,
26539,
26570,
26538,
26569,
26545,
26576,
26544,
26575,
26542,
26573,
26549,
26580,
26551,
26582,
26550,
26581], instance=2)

In [None]:
struct_aseg_mean_intensity = addFields(ukbio=ukb, df=df_struct_aseg_mean_intensity, fields=['eid',
26501,
26502,
26503,
26504,
26513,
26511,
26512,
26510,
26509,
26505,
26508,
26506,
26507,
26548,
26579,
26547,
26578,
26543,
26574,
26541,
26572,
26540,
26571,
26546,
26577,
26539,
26570,
26538,
26569,
26545,
26576,
26544,
26575,
26542,
26573,
26549,
26580,
26551,
26582,
26550,
26581], instances=2)
struct_aseg_mean_intensity_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_aseg_mean_intensity)

Drop NAs, remove instance number, and save

In [None]:
struct_aseg_mean_intensity_names_nona = struct_aseg_mean_intensity_names.dropna(axis=0)
struct_aseg_mean_intensity_names_nona.columns = struct_aseg_mean_intensity_names_nona.columns.str.replace('-2.0', '')
struct_aseg_mean_intensity_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_aseg_mean_intensity_names_nona.csv', index=False)
struct_aseg_mean_intensity_names_nona

### 3.2. Freesurfer ASEG, Volume whole brain: 54

#### 3.2.1 Freesurfer ASEG, Volume whole brain

- 26523	Volume of 3rd-Ventricle (whole brain)		
- 26524	Volume of 4th-Ventricle (whole brain)		
- 26525	Volume of 5th-Ventricle (whole brain)		
- 26526	Volume of Brain-Stem (whole brain)		
- 26514	Volume of BrainSeg (whole brain)		
- 26515	Volume of BrainSegNotVent (whole brain)		
- 26516	Volume of BrainSegNotVentSurf (whole brain)		
- 26535	Volume of CC-Anterior (whole brain)		
- 26533	Volume of CC-Central (whole brain)		
- 26534	Volume of CC-Mid-Anterior (whole brain)		
- 26532	Volume of CC-Mid-Posterior (whole brain)		
- 26531	Volume of CC-Posterior (whole brain)		
- 26527	Volume of CSF (whole brain)		
- 26521	Volume of EstimatedTotalIntraCranial (whole brain)			
- 26530	Volume of Optic-Chiasm (whole brain)			
- 26517	Volume of SubCortGray (whole brain)			
- 26519	Volume of SupraTentorial (whole brain)			
- 26520	Volume of SupraTentorialNotVent (whole brain)			
- 26518	Volume of TotalGray (whole brain)			
- 26522	Volume of VentricleChoroid (whole brain)			
- 26528	Volume of WM-hypointensities (whole brain)			
- 26529	Volume of non-WM-hypointensities (whole brain)					

#### 3.2.2 Freesurfer ASEG, Volume left/right

- 26564	Volume of Accumbens-area (left hemisphere)		
- 26595	Volume of Accumbens-area (right hemisphere)		
- 26563	Volume of Amygdala (left hemisphere)		
- 26594	Volume of Amygdala (right hemisphere)	
- 26559	Volume of Caudate (left hemisphere)			
- 26590	Volume of Caudate (right hemisphere)			
- 26557	Volume of Cerebellum-Cortex (left hemisphere)			
- 26588	Volume of Cerebellum-Cortex (right hemisphere)			
- 26556	Volume of Cerebellum-White-Matter (left hemisphere)			
- 26587	Volume of Cerebellum-White-Matter (right hemisphere)			
- 26553	Volume of CerebralWhiteMatter (left hemisphere)			
- 26584	Volume of CerebralWhiteMatter (right hemisphere)			
- 26552	Volume of Cortex (left hemisphere)			
- 26583	Volume of Cortex (right hemisphere)			
- 26562	Volume of Hippocampus (left hemisphere)			
- 26593	Volume of Hippocampus (right hemisphere)			
- 26555	Volume of Inf-Lat-Vent (left hemisphere)			
- 26586	Volume of Inf-Lat-Vent (right hemisphere)			
- 26554	Volume of Lateral-Ventricle (left hemisphere)			
- 26585	Volume of Lateral-Ventricle (right hemisphere)			
- 26561	Volume of Pallidum (left hemisphere)			
- 26592	Volume of Pallidum (right hemisphere)			
- 26560	Volume of Putamen (left hemisphere)			
- 26591	Volume of Putamen (right hemisphere)			
- 26558	Volume of Thalamus-Proper (left hemisphere)			
- 26589	Volume of Thalamus-Proper (right hemisphere)			
- 26565	Volume of VentralDC (left hemisphere)			
- 26596	Volume of VentralDC (right hemisphere)			
- 26567	Volume of choroid-plexus (left hemisphere)			
- 26598	Volume of choroid-plexus (right hemisphere)			
- 26566	Volume of vessel (left hemisphere)			
- 26597	Volume of vessel (right hemisphere)				


Get the fields

In [None]:
df_struct_aseg_volume = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
26523,
26524,
26525,
26526,
26514,
26515,
26516,
26535,
26533,
26534,
26532,
26531,
26527,
26521,
26530,
26517,
26519,
26520,
26518,
26522,
26528,
26529,
26564,
26595,
26563,
26594,
26559,
26590,
26557,
26588,
26556,
26587,
26553,
26584,
26552,
26583,
26562,
26593,
26555,
26586,
26554,
26585,
26561,
26592,
26560,
26591,
26558,
26589,
26565,
26596,
26567,
26598,
26566,
26597], instance=2)

In [None]:
struct_aseg_volume = addFields(ukbio=ukb, df=df_struct_aseg_volume, fields=['eid',
26523,
26524,
26525,
26526,
26514,
26515,
26516,
26535,
26533,
26534,
26532,
26531,
26527,
26521,
26530,
26517,
26519,
26520,
26518,
26522,
26528,
26529,
26564,
26595,
26563,
26594,
26559,
26590,
26557,
26588,
26556,
26587,
26553,
26584,
26552,
26583,
26562,
26593,
26555,
26586,
26554,
26585,
26561,
26592,
26560,
26591,
26558,
26589,
26565,
26596,
26567,
26598,
26566,
26597], instances=2)
struct_aseg_volume_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_aseg_volume)

Drop NAs, remove instance number, and save

In [None]:
struct_aseg_volume_names_nona = struct_aseg_volume_names.dropna(axis=0)
struct_aseg_volume_names_nona.columns = struct_aseg_volume_names_nona.columns.str.replace('-2.0', '')
struct_aseg_volume_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_aseg_volume_names_nona.csv', index=False)
struct_aseg_volume_names_nona

### 4. Freesurfer BA exvivo

#### 4.1. Freesurfer BA exvivo Area: 28

Get the fields

In [None]:
df_struct_ba_exvivo_area = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27059,
27101,
27060,
27102,
27061,
27103,
27062,
27104,
27066,
27108,
27067,
27109,
27063,
27105,
27064,
27106,
27065,
27107,
27070,
27112,
27068,
27110,
27069,
27111,
27072,
27114,
27071,
27113], instance=2)

In [None]:
struct_ba_exvivo_area = addFields(ukbio=ukb, df=df_struct_ba_exvivo_area, fields=['eid',
27059,
27101,
27060,
27102,
27061,
27103,
27062,
27104,
27066,
27108,
27067,
27109,
27063,
27105,
27064,
27106,
27065,
27107,
27070,
27112,
27068,
27110,
27069,
27111,
27072,
27114,
27071,
27113], instances=2)
struct_ba_exvivo_area_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_ba_exvivo_area)

Drop NAs, remove instance number, and save

In [None]:
struct_ba_exvivo_area_names_nona = struct_ba_exvivo_area_names.dropna(axis=0)
struct_ba_exvivo_area_names_nona.columns = struct_ba_exvivo_area_names_nona.columns.str.replace('-2.0', '')
struct_ba_exvivo_area_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_ba_exvivo_area_names_nona.csv', index=False)
struct_ba_exvivo_area_names_nona

#### 4.2. Freesurfer BA exvivo Mean Thickness: 28

Get the fields

In [None]:
df_struct_ba_exvivo_mean_thickness = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27073,
27115,
27074,
27116,
27075,
27117,
27076,
27118,
27080,
27122,
27081,
27123,
27077,
27119,
27078,
27120,
27079,
27121,
27084,
27126,
27082,
27124,
27083,
27125,
27086,
27128,
27085,
27127], instance=2)

In [None]:
struct_ba_exvivo_mean_thickness = addFields(ukbio=ukb, df=df_struct_ba_exvivo_mean_thickness, fields=['eid',
27073,
27115,
27074,
27116,
27075,
27117,
27076,
27118,
27080,
27122,
27081,
27123,
27077,
27119,
27078,
27120,
27079,
27121,
27084,
27126,
27082,
27124,
27083,
27125,
27086,
27128,
27085,
27127], instances=2)
struct_ba_exvivo_mean_thickness_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_ba_exvivo_mean_thickness)

Drop NAs, remove instance number, and save

In [None]:
struct_ba_exvivo_mean_thickness_names_nona = struct_ba_exvivo_mean_thickness_names.dropna(axis=0)
struct_ba_exvivo_mean_thickness_names_nona.columns = struct_ba_exvivo_mean_thickness_names_nona.columns.str.replace('-2.0', '')
struct_ba_exvivo_mean_thickness_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_ba_exvivo_mean_thickness_names_nona.csv', index=False)
struct_ba_exvivo_mean_thickness_names_nona

#### 4.3. Freesurfer BA exvivo Volume: 28

Get the fields

In [None]:
df_struct_ba_exvivo_volume = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27087,
27129,
27088,
27130,
27089,
27131,
27090,
27132,
27094,
27136,
27095,
27137,
27091,
27133,
27092,
27134,
27093,
27135,
27098,
27140,
27096,
27138,
27097,
27139,
27100,
27142,
27099,
27141], instance=2)

In [None]:
struct_ba_exvivo_volume = addFields(ukbio=ukb, df=df_struct_ba_exvivo_volume, fields=['eid',
27087,
27129,
27088,
27130,
27089,
27131,
27090,
27132,
27094,
27136,
27095,
27137,
27091,
27133,
27092,
27134,
27093,
27135,
27098,
27140,
27096,
27138,
27097,
27139,
27100,
27142,
27099,
27141], instances=2)
struct_ba_exvivo_volume_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_ba_exvivo_volume)

Drop NAs, remove instance number, and save

In [None]:
struct_ba_exvivo_volume_names_nona = struct_ba_exvivo_volume_names.dropna(axis=0)
struct_ba_exvivo_volume_names_nona.columns = struct_ba_exvivo_volume_names_nona.columns.str.replace('-2.0', '')
struct_ba_exvivo_volume_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_ba_exvivo_volume_names_nona.csv', index=False)
struct_ba_exvivo_volume_names_nona

### 5. Freesurfer a2009s

#### 5.1 Freesurfer a2009s Area: 148

Get the fields

In [None]:
df_struct_a2009s_area = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27334,
27556,
27335,
27557,
27336,
27558,
27329,
27551,
27330,
27552,
27331,
27553,
27332,
27554,
27333,
27555,
27345,
27567,
27337,
27559,
27338,
27560,
27339,
27561,
27340,
27562,
27341,
27563,
27342,
27564,
27343,
27565,
27344,
27566,
27346,
27568,
27349,
27571,
27350,
27572,
27351,
27573,
27347,
27569,
27348,
27570,
27352,
27574,
27353,
27575,
27354,
27576,
27355,
27577,
27356,
27578,
27357,
27579,
27358,
27580,
27359,
27581,
27360,
27582,
27361,
27583,
27362,
27584,
27363,
27585,
27364,
27586,
27365,
27587,
27366,
27588,
27367,
27589,
27368,
27590,
27369,
27591,
27370,
27592,
27371,
27593,
27372,
27594,
27373,
27595,
27374,
27596,
27375,
27597,
27376,
27598,
27377,
27599,
27378,
27600,
27379,
27601,
27380,
27602,
27381,
27603,
27382,
27604,
27383,
27605,
27384,
27606,
27385,
27607,
27386,
27608,
27388,
27610,
27389,
27611,
27387,
27609,
27392,
27614,
27390,
27612,
27391,
27613,
27393,
27615,
27394,
27616,
27395,
27617,
27396,
27618,
27397,
27619,
27398,
27620,
27399,
27621,
27400,
27622,
27401,
27623,
27402,
27624], instance=2)

In [None]:
struct_a2009s_area = addFields(ukbio=ukb, df=df_struct_a2009s_area, fields=['eid',
27334,
27556,
27335,
27557,
27336,
27558,
27329,
27551,
27330,
27552,
27331,
27553,
27332,
27554,
27333,
27555,
27345,
27567,
27337,
27559,
27338,
27560,
27339,
27561,
27340,
27562,
27341,
27563,
27342,
27564,
27343,
27565,
27344,
27566,
27346,
27568,
27349,
27571,
27350,
27572,
27351,
27573,
27347,
27569,
27348,
27570,
27352,
27574,
27353,
27575,
27354,
27576,
27355,
27577,
27356,
27578,
27357,
27579,
27358,
27580,
27359,
27581,
27360,
27582,
27361,
27583,
27362,
27584,
27363,
27585,
27364,
27586,
27365,
27587,
27366,
27588,
27367,
27589,
27368,
27590,
27369,
27591,
27370,
27592,
27371,
27593,
27372,
27594,
27373,
27595,
27374,
27596,
27375,
27597,
27376,
27598,
27377,
27599,
27378,
27600,
27379,
27601,
27380,
27602,
27381,
27603,
27382,
27604,
27383,
27605,
27384,
27606,
27385,
27607,
27386,
27608,
27388,
27610,
27389,
27611,
27387,
27609,
27392,
27614,
27390,
27612,
27391,
27613,
27393,
27615,
27394,
27616,
27395,
27617,
27396,
27618,
27397,
27619,
27398,
27620,
27399,
27621,
27400,
27622,
27401,
27623,
27402,
27624], instances=2)
struct_a2009s_area_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_a2009s_area)

Drop NAs, remove instance number, and save

In [None]:
struct_a2009s_area_names_nona = struct_a2009s_area_names.dropna(axis=0)
struct_a2009s_area_names_nona.columns = struct_a2009s_area_names_nona.columns.str.replace('-2.0', '')
struct_a2009s_area_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_a2009s_area_names_nona.csv', index=False)
struct_a2009s_area_names_nona

#### 5.2 Freesurfer a2009s Mean thickness: 149

In [None]:
df_struct_a2009s_mean_thickness = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27408,
27630,
27409,
27631,
27410,
27632,
27403,
27625,
27404,
27626,
27405,
27627,
27406,
27628,
27407,
27629,
27419,
27641,
27411,
27633,
27412,
27634,
27413,
27635,
27414,
27636,
27415,
27637,
27416,
27638,
27417,
27639,
27418,
27640,
27420,
27642,
27423,
27645,
27424,
27646,
27425,
27647,
27421,
27643,
27422,
27644,
27426,
27648,
27427,
27649,
27428,
27650,
27429,
27651,
27430,
27652,
27431,
27653,
27432,
27654,
27433,
27655,
27434,
27656,
27435,
27657,
27436,
27658,
27437,
27659,
27438,
27660,
27439,
27661,
27440,
27662,
27441,
27663,
27442,
27664,
27443,
27665,
27444,
27666,
27445,
27667,
27446,
27668,
27447,
27669,
27448,
27670,
27449,
27671,
27450,
27672,
27451,
27673,
27452,
27674,
27453,
27675,
27454,
27676,
27455,
27677,
27456,
27678,
27457,
27679,
27458,
27680,
27459,
27681,
27460,
27682,
27462,
27684,
27463,
27685,
27461,
27683,
27466,
27688,
27464,
27686,
27465,
27687,
27467,
27689,
27468,
27690,
27469,
27691,
27470,
27692,
27471,
27693,
27472,
27694,
27473,
27695,
27474,
27696,
27475,
27697,
27476,
27698], instance=2)

In [None]:
struct_a2009s_mean_thickness = addFields(ukbio=ukb, df=df_struct_a2009s_mean_thickness, fields=['eid',
27408,
27630,
27409,
27631,
27410,
27632,
27403,
27625,
27404,
27626,
27405,
27627,
27406,
27628,
27407,
27629,
27419,
27641,
27411,
27633,
27412,
27634,
27413,
27635,
27414,
27636,
27415,
27637,
27416,
27638,
27417,
27639,
27418,
27640,
27420,
27642,
27423,
27645,
27424,
27646,
27425,
27647,
27421,
27643,
27422,
27644,
27426,
27648,
27427,
27649,
27428,
27650,
27429,
27651,
27430,
27652,
27431,
27653,
27432,
27654,
27433,
27655,
27434,
27656,
27435,
27657,
27436,
27658,
27437,
27659,
27438,
27660,
27439,
27661,
27440,
27662,
27441,
27663,
27442,
27664,
27443,
27665,
27444,
27666,
27445,
27667,
27446,
27668,
27447,
27669,
27448,
27670,
27449,
27671,
27450,
27672,
27451,
27673,
27452,
27674,
27453,
27675,
27454,
27676,
27455,
27677,
27456,
27678,
27457,
27679,
27458,
27680,
27459,
27681,
27460,
27682,
27462,
27684,
27463,
27685,
27461,
27683,
27466,
27688,
27464,
27686,
27465,
27687,
27467,
27689,
27468,
27690,
27469,
27691,
27470,
27692,
27471,
27693,
27472,
27694,
27473,
27695,
27474,
27696,
27475,
27697,
27476,
27698], instances=2)
struct_a2009s_mean_thickness_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_a2009s_mean_thickness)

Drop NAs, remove instance number, and save

In [None]:
struct_a2009s_mean_thickness_names_nona = struct_a2009s_mean_thickness_names.dropna(axis=0)
struct_a2009s_mean_thickness_names_nona.columns = struct_a2009s_mean_thickness_names_nona.columns.str.replace('-2.0', '')
struct_a2009s_mean_thickness_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_a2009s_mean_thickness_names_nona.csv', index=False)
struct_a2009s_mean_thickness_names_nona

#### 5.3 Freesurfer a2009s volume G+S: 148

In [None]:
df_struct_a2009s_volume = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27482,
27704,
27483,
27705,
27484,
27706,
27477,
27699,
27478,
27700,
27479,
27701,
27480,
27702,
27481,
27703,
27493,
27715,
27485,
27707,
27486,
27708,
27487,
27709,
27488,
27710,
27489,
27711,
27490,
27712,
27491,
27713,
27492,
27714,
27494,
27716,
27497,
27719,
27498,
27720,
27499,
27721,
27495,
27717,
27496,
27718,
27500,
27722,
27501,
27723,
27502,
27724,
27503,
27725,
27504,
27726,
27505,
27727,
27506,
27728,
27507,
27729,
27508,
27730,
27509,
27731,
27510,
27732,
27511,
27733,
27512,
27734,
27513,
27735,
27514,
27736,
27515,
27737,
27516,
27738,
27517,
27739,
27518,
27740,
27519,
27741,
27520,
27742,
27521,
27743,
27522,
27744,
27523,
27745,
27524,
27746,
27525,
27747,
27526,
27748,
27527,
27749,
27528,
27750,
27529,
27751,
27530,
27752,
27531,
27753,
27532,
27754,
27533,
27755,
27534,
27756,
27536,
27758,
27537,
27759,
27535,
27757,
27540,
27762,
27538,
27760,
27539,
27761,
27541,
27763,
27542,
27764,
27543,
27765,
27544,
27766,
27545,
27767,
27546,
27768,
27547,
27769,
27548,
27770,
27549,
27771,
27550,
27772], instance=2)

In [None]:
struct_a2009s_volume = addFields(ukbio=ukb, df=df_struct_a2009s_volume , fields=['eid',
27482,
27704,
27483,
27705,
27484,
27706,
27477,
27699,
27478,
27700,
27479,
27701,
27480,
27702,
27481,
27703,
27493,
27715,
27485,
27707,
27486,
27708,
27487,
27709,
27488,
27710,
27489,
27711,
27490,
27712,
27491,
27713,
27492,
27714,
27494,
27716,
27497,
27719,
27498,
27720,
27499,
27721,
27495,
27717,
27496,
27718,
27500,
27722,
27501,
27723,
27502,
27724,
27503,
27725,
27504,
27726,
27505,
27727,
27506,
27728,
27507,
27729,
27508,
27730,
27509,
27731,
27510,
27732,
27511,
27733,
27512,
27734,
27513,
27735,
27514,
27736,
27515,
27737,
27516,
27738,
27517,
27739,
27518,
27740,
27519,
27741,
27520,
27742,
27521,
27743,
27522,
27744,
27523,
27745,
27524,
27746,
27525,
27747,
27526,
27748,
27527,
27749,
27528,
27750,
27529,
27751,
27530,
27752,
27531,
27753,
27532,
27754,
27533,
27755,
27534,
27756,
27536,
27758,
27537,
27759,
27535,
27757,
27540,
27762,
27538,
27760,
27539,
27761,
27541,
27763,
27542,
27764,
27543,
27765,
27544,
27766,
27545,
27767,
27546,
27768,
27547,
27769,
27548,
27770,
27549,
27771,
27550,
27772], instances=2)
struct_a2009s_volume_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_a2009s_volume)

Drop NAs, remove instance number, and save

In [None]:
struct_a2009s_volume_names_nona = struct_a2009s_volume_names.dropna(axis=0)
struct_a2009s_volume_names_nona.columns = struct_a2009s_volume_names_nona.columns.str.replace('-2.0', '')
struct_a2009s_volume_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_a2009s_volume_names_nona.csv', index=False)
struct_a2009s_volume_names_nona

### 6. Freesurfer DKT

#### 6.1 Freesurfer DKT area: 62

Get the fields

In [None]:
df_struct_dkt_area = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27143,
27236,
27144,
27237,
27145,
27238,
27146,
27239,
27147,
27240,
27148,
27241,
27149,
27242,
27173,
27266,
27150,
27243,
27151,
27244,
27152,
27245,
27153,
27246,
27154,
27247,
27155,
27248,
27157,
27250,
27156,
27249,
27158,
27251,
27159,
27252,
27160,
27253,
27161,
27254,
27162,
27255,
27163,
27256,
27164,
27257,
27165,
27258,
27166,
27259,
27167,
27260,
27168,
27261,
27169,
27262,
27170,
27263,
27171,
27264,
27172,
27265], instance=2)

In [None]:
struct_dkt_area = addFields(ukbio=ukb, df=df_struct_dkt_area , fields=['eid',
27143,
27236,
27144,
27237,
27145,
27238,
27146,
27239,
27147,
27240,
27148,
27241,
27149,
27242,
27173,
27266,
27150,
27243,
27151,
27244,
27152,
27245,
27153,
27246,
27154,
27247,
27155,
27248,
27157,
27250,
27156,
27249,
27158,
27251,
27159,
27252,
27160,
27253,
27161,
27254,
27162,
27255,
27163,
27256,
27164,
27257,
27165,
27258,
27166,
27259,
27167,
27260,
27168,
27261,
27169,
27262,
27170,
27263,
27171,
27264,
27172,
27265], instances=2)
struct_dkt_area_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_dkt_area)

Drop NAs, remove instance number, and save

In [None]:
struct_dkt_area_names_nona = struct_dkt_area_names.dropna(axis=0)
struct_dkt_area_names_nona.columns = struct_dkt_area_names_nona.columns.str.replace('-2.0', '')
struct_dkt_area_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_dkt_area_names_nona.csv', index=False)
struct_dkt_area_names_nona

#### 6.2 Freesurfer DKT Mean thickness: 62

In [None]:
df_struct_dkt_mean_thickness = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27174,
27267,
27175,
27268,
27176,
27269,
27177,
27270,
27178,
27271,
27179,
27272,
27180,
27273,
27204,
27297,
27181,
27274,
27182,
27275,
27183,
27276,
27184,
27277,
27185,
27278,
27186,
27279,
27188,
27281,
27187,
27280,
27189,
27282,
27190,
27283,
27191,
27284,
27192,
27285,
27193,
27286,
27194,
27287,
27195,
27288,
27196,
27289,
27197,
27290,
27198,
27291,
27199,
27292,
27200,
27293,
27201,
27294,
27202,
27295,
27203,
27296], instance=2)

Drop NAs, remove instance number, and save

In [None]:
struct_dkt_mean_thickness_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_dkt_mean_thickness)
struct_dkt_mean_thickness_names_nona = struct_dkt_mean_thickness_names.dropna(axis=0)
struct_dkt_mean_thickness_names_nona.columns = struct_dkt_mean_thickness_names_nona.columns.str.replace('-2.0', '')
struct_dkt_mean_thickness_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_dkt_mean_thickness_names_nona.csv', index=False)
struct_dkt_mean_thickness_names_nona

#### 6.3 Freesurfer DKT Volume: 62

In [None]:
df_struct_dkt_volume = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
27205,
27298,
27206,
27299,
27207,
27300,
27208,
27301,
27209,
27302,
27210,
27303,
27211,
27304,
27235,
27328,
27212,
27305,
27213,
27306,
27214,
27307,
27215,
27308,
27216,
27309,
27217,
27310,
27219,
27312,
27218,
27311,
27220,
27313,
27221,
27314,
27222,
27315,
27223,
27316,
27224,
27317,
27225,
27318,
27226,
27319,
27227,
27320,
27228,
27321,
27229,
27322,
27230,
27323,
27231,
27324,
27232,
27325,
27233,
27326,
27234,
27327], instance=2)

Drop NAs, remove instance number, and save

In [None]:
struct_dkt_volume_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_dkt_volume)
struct_dkt_volume_names_nona = struct_dkt_volume_names.dropna(axis=0)
struct_dkt_volume_names_nona.columns = struct_dkt_volume_names_nona.columns.str.replace('-2.0', '')
struct_dkt_volume_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_dkt_volume_names_nona.csv', index=False)
struct_dkt_volume_names_nona

### 7. Freesurfer desikan gw: 70

In [None]:
df_struct_desikan_gw = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
26990,
27025,
26991,
27026,
26992,
27027,
26993,
27028,
26994,
27029,
27020,
27055,
26995,
27030,
26996,
27031,
26997,
27032,
27023,
27058,
26998,
27033,
26999,
27034,
27000,
27035,
27001,
27036,
27002,
27037,
27003,
27038,
27005,
27040,
27004,
27039,
27006,
27041,
27007,
27042,
27008,
27043,
27009,
27044,
27010,
27045,
27011,
27046,
27012,
27047,
27013,
27048,
27014,
27049,
27015,
27050,
27016,
27051,
27017,
27052,
27018,
27053,
27019,
27054,
27021,
27056,
27022,
27057,
26989,
27024], instance=2)

Drop NAs, remove instance number, and save

In [None]:
struct_desikan_gw_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_desikan_gw)
struct_desikan_gw_names_nona = struct_desikan_gw_names.dropna(axis=0)
struct_desikan_gw_names_nona.columns = struct_desikan_gw_names_nona.columns.str.replace('-2.0', '')
struct_desikan_gw_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_desikan_gw_names_nona.csv', index=False)
struct_desikan_gw_names_nona

### 8. Freesurfer desikan pial: 66

In [None]:
df_desikan_pial = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
26923,
26956,
26924,
26957,
26925,
26958,
26926,
26959,
26927,
26960,
26928,
26961,
26954,
26987,
26929,
26962,
26930,
26963,
26931,
26964,
26932,
26965,
26933,
26966,
26934,
26967,
26935,
26968,
26936,
26969,
26937,
26970,
26939,
26972,
26938,
26971,
26940,
26973,
26941,
26974,
26942,
26975,
26943,
26976,
26944,
26977,
26945,
26978,
26946,
26979,
26947,
26980,
26948,
26981,
26949,
26982,
26950,
26983,
26951,
26984,
26952,
26985,
26953,
26986,
26955,
26988], instance=2)

Drop NAs, remove instance number, and save

In [None]:
struct_desikan_pial_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_desikan_pial)
struct_desikan_pial_names_nona = struct_desikan_pial_names.dropna(axis=0)
struct_desikan_pial_names_nona.columns = struct_desikan_pial_names_nona.columns.str.replace('-2.0', '')
struct_desikan_pial_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_desikan_pial_names_nona.csv', index=False)
struct_desikan_pial_names_nona

### 9. Freesurfer desikan white

#### 9.1 Freesurfer desikan white Area: 68

In [None]:
df_struct_desikan_white_area = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
26721,
26822,
26722,
26823,
26723,
26824,
26724,
26825,
26725,
26826,
26726,
26827,
26752,
26853,
26727,
26828,
26728,
26829,
26729,
26830,
26754,
26855,
26730,
26831,
26731,
26832,
26732,
26833,
26733,
26834,
26734,
26835,
26735,
26836,
26737,
26838,
26736,
26837,
26738,
26839,
26739,
26840,
26740,
26841,
26741,
26842,
26742,
26843,
26743,
26844,
26744,
26845,
26745,
26846,
26746,
26847,
26747,
26848,
26748,
26849,
26749,
26850,
26750,
26851,
26751,
26852,
26753,
26854], instance=2)

Drop NAs, remove instance number, and save

In [None]:
struct_desikan_white_area_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_desikan_white_area)
struct_desikan_white_area_names_nona = struct_desikan_white_area_names.dropna(axis=0)
struct_desikan_white_area_names_nona.columns = struct_desikan_white_area_names_nona.columns.str.replace('-2.0', '')
struct_desikan_white_area_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_desikan_white_area_names_nona.csv', index=False)
struct_desikan_white_area_names_nona

#### 9.2 Freesurfer desikan white Mean thickness: 68

In [None]:
df_struct_desikan_white_mean_thickness = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
26755,
26856,
26756,
26857,
26757,
26858,
26758,
26859,
26759,
26860,
26760,
26861,
26786,
26887,
26761,
26862,
26762,
26863,
26763,
26864,
26788,
26889,
26764,
26865,
26765,
26866,
26766,
26867,
26767,
26868,
26768,
26869,
26769,
26870,
26771,
26872,
26770,
26871,
26772,
26873,
26773,
26874,
26774,
26875,
26775,
26876,
26776,
26877,
26777,
26878,
26778,
26879,
26779,
26880,
26780,
26881,
26781,
26882,
26782,
26883,
26783,
26884,
26784,
26885,
26785,
26886,
26787,
26888], instance=2)

Drop NAs, remove instance number, and save

In [None]:
struct_desikan_white_mean_thickness_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_desikan_white_mean_thickness)
struct_desikan_white_mean_thickness_names_nona = struct_desikan_white_mean_thickness_names.dropna(axis=0)
struct_desikan_white_mean_thickness_names_nona.columns = struct_desikan_white_mean_thickness_names_nona.columns.str.replace('-2.0', '')
struct_desikan_white_mean_thickness_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_desikan_white_mean_thickness_names_nona.csv', index=False)
struct_desikan_white_mean_thickness_names_nona

#### 9.3 Freesurfer desikan white Volume: 66

In [None]:
df_struct_desikan_white_volume = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
26789,
26890,
26790,
26891,
26791,
26892,
26792,
26893,
26793,
26894,
26819,
26920,
26794,
26895,
26795,
26896,
26796,
26897,
26821,
26922,
26797,
26898,
26798,
26899,
26799,
26900,
26800,
26901,
26801,
26902,
26802,
26903,
26804,
26905,
26803,
26904,
26805,
26906,
26806,
26907,
26807,
26908,
26808,
26909,
26809,
26910,
26810,
26911,
26811,
26912,
26812,
26913,
26813,
26914,
26814,
26915,
26815,
26916,
26816,
26917,
26817,
26918,
26818,
26919,
26820,
26921], instance=2)

Drop NAs, remove instance number, and save

In [None]:
struct_desikan_white_volume_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_desikan_white_volume)
struct_desikan_white_volume_names_nona = struct_desikan_white_volume_names.dropna(axis=0)
struct_desikan_white_volume_names_nona.columns = struct_desikan_white_volume_names_nona.columns.str.replace('-2.0', '')
struct_desikan_white_volume_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_desikan_white_volume_names_nona.csv', index=False)
struct_desikan_white_volume_names_nona

### 10. Freesurfer subsegmentation: 121

In [None]:
df_struct_subsegmentation = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
26684,
26710,
26602,
26612,
26603,
26613,
26601,
26611,
26622,
26644,
26626,
26648,
26632,
26654,
26637,
26659,
26635,
26657,
26634,
26656,
26681,
26706,
26670,
26694,
26677,
26703,
26604,
26614,
26606,
26616,
26607,
26617,
26633,
26655,
26631,
26653,
26638,
26660,
26620,
26642,
26668,
26692,
26712,
26713,
26665,
26688,
26687,
26711,
26600,
26610,
26676,
26700,
26673,
26697,
26664,
26689,
26679,
26702,
26605,
26615,
26716,
26719,
26608,
26618,
26685,
26708,
26674,
26698,
26717,
26683,
26709,
26672,
26696,
26666,
26690,
26682,
26705,
26667,
26691,
26718,
26678,
26701,
26675,
26699,
26671,
26695,
26686,
26707,
26680,
26704,
26669,
26693,
26609,
26619,
26720,
26639,
26661,
26640,
26662,
26641,
26663,
26714,
26715,
26636,
26658,
26624,
26646,
26630,
26652,
26629,
26651,
26628,
26650,
26627,
26649,
26625,
26647,
26621,
26643,
26623,
26645], instance=2)

Drop NAs, remove instance number, and save

In [None]:
struct_subsegmentation_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_subsegmentation)
struct_subsegmentation_names_nona = struct_subsegmentation_names.dropna(axis=0)
struct_subsegmentation_names_nona.columns = struct_subsegmentation_names_nona.columns.str.replace('-2.0', '')
struct_subsegmentation_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_subsegmentation_names_nona.csv', index=False)
struct_subsegmentation_names_nona

### 11. Additional: T1 and T2

### T1 

- 25010	Volume of brain, grey+white matter
- 25009	Volume of brain, grey+white matter (normalised for head size) - exclude
- 25008	Volume of white matter
- 25007	Volume of white matter (normalised for head size) - exclude
- 25006	Volume of grey matter
- 25005	Volume of grey matter (normalised for head size) - exclude
- 25002	Volume of peripheral cortical grey matter
- 25001	Volume of peripheral cortical grey matter (normalised for head size) - exclude
- 25004	Volume of ventricular cerebrospinal fluid
- 25003	Volume of ventricular cerebrospinal fluid (normalised for head size) - exclude
- 25025	Volume of brain stem + 4th ventricle
- 26536	Volume-ratio of BrainSegVol-to-eTIV (whole brain)
- 26537	Volume-ratio of MaskVol-to-eTIV (whole brain)

Exclude:

- 26568	Number of HolesBeforeFixing (left hemisphere)
- 26599	Number of HolesBeforeFixing (right hemisphere)

In [None]:
df_add_t1 = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
25010, #Volume of brain, grey+white matter - T1
#25009, Volume of brain, grey+white matter (normalised for head size) - T1
25008, #Volume of white matter - T1
#25007, Volume of white matter (normalised for head size) - T1
25006, #Volume of grey matter - T1
#25005, Volume of grey matter (normalised for head size) - T1
25002, #Volume of peripheral cortical grey matter - T1
#25001, Volume of peripheral cortical grey matter (normalised for head size) - T1
25004, #Volume of ventricular cerebrospinal fluid - T1
#25003, Volume of ventricular cerebrospinal fluid (normalised for head size)  - T1
25025, #Volume of brain stem + 4th ventricle - T1
26536, #Volume-ratio of BrainSegVol-to-eTIV (whole brain) - T1
26537, #Volume-ratio of MaskVol-to-eTIV (whole brain) - T1
], instance=2)

In [None]:
add_t1_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_add_t1)
add_t1_names_nona = add_t1_names.dropna(axis=0)
add_t1_names_nona.columns = add_t1_names_nona.columns.str.replace('-2.0', '')
add_t1_names_nona.to_csv('/ML_DATASETS/Brain/T1/additional_(t1-t2-MRI)-T1_names_nona.csv', index=False)
add_t1_names_nona

# Confounds

## Upload confoudns

### T1w additional

Basic

- 25925	Intensity scaling for T1
- 26500	T2-FLAIR used (in addition to T1) to run FreeSurfer
- 25733	Amount of warping applied to non-linearly align T1 brain image to standard-space
- 25731	Discrepancy between T1 brain image and standard-space brain template (linearly-aligned)
- 25732	Discrepancy between T1 brain image and standard-space brain template (nonlinearly-aligned)
- 25735	Inverted contrast-to-noise ratio in T1
- 25734	Inverted signal-to-noise ratio in T1
- 25000	Volumetric scaling from T1 head image to standard space
- 25756	Scanner lateral (X) brain position
- 25757	Scanner transverse (Y) brain position
- 25758	Scanner longitudinal (Z) brain position
- 25759	Scanner table position

### Confounds from other MRI modalities

- Head size: 25000
- Site: 54
- Acquisition date: 53
- STRUCT MOTION: 24419
- Discrepancy between T1 brain image and standard-space brain template (linearly-aligned): 25731
- Discrepancy between T1 brain image and standard-space brain template (nonlinearly-aligned): 25732

In [None]:
df_add_t1_conf = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
], instance=2)
add_t1_conf_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_add_t1_conf)
add_t1_conf_names_nona = add_t1_conf_names.dropna(axis=0)
add_t1_conf_names_nona.columns = add_t1_conf_names_nona.columns.str.replace('-2.0', '')
add_t1_conf_names_nona.to_csv('/ML_DATASETS/Brain/T1/additional_(t1-t2-MRI)-T1_CONF.csv', index=False)
add_t1_conf_names_nona

### T2w

- 24486	Total volume of deep white matter hyperintensities
- 24485	Total volume of peri-ventricular white matter hyperintensities
- 25781	Total volume of white matter hyperintensities (from T1 and T2_FLAIR images)

#### Confound:

- 25926	Intensity scaling for T2_FLAIR
- 25736	Discrepancy between T2 FLAIR brain image and T1 brain image


In [None]:
df_add_t2 = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
24486,	#Total volume of deep white matter hyperintensities - T2
24485,	#Total volume of peri-ventricular white matter hyperintensities - T2
25781,	#Total volume of white matter hyperintensities (from T1 and T2_FLAIR images) - T2
], instance=2)
add_t2_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_add_t2)
add_t2_names_nona = add_t2_names.dropna(axis=0)
add_t2_names_nona.columns = add_t2_names_nona.columns.str.replace('-2.0', '')
add_t2_names_nona.to_csv('/ML_DATASETS/Brain/T1/additional_(t1-t2-MRI)-T2_names_nona.csv', index=False)
add_t2_names_nona

If you want to upload particular columns from a big csv, use 'usecols=[]'

In [None]:
add_t2 = pd.read_csv('/ukbbdata/ukbb_oct23/ukb.csv', usecols=['eid','24486-2.0', '24485-2.0', '25781-2.0'])
add_t2.columns = ['eid','Total volume of deep white matter hyperintensities from T2', 'Total volume of peri-ventricular white matter hyperintensities from T2', 'Total volume of white matter hyperintensities (from T1 and T2_FLAIR images)']
add_t2_nona = add_t2.dropna(axis=0).reset_index(drop=True)
add_t2_nona.to_csv('/ML_DATASETS/Brain/T1/additional_(t1-t2-MRI)-T2_names_nona.csv', index=False)

Upload confounds for T2

- Head size: 25000
- Site: 54
- Acquisition date: 53
- STRUCT MOTION: 24419
- Discrepancy between T1 brain image and standard-space brain template (linearly-aligned): 25731
- Discrepancy between T1 brain image and standard-space brain template (nonlinearly-aligned): 25732

In [None]:
df_add_t2_conf = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
25926,
25736,
25000,
54,
53,
24419], instance=2)
add_t2_conf_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_add_t2_conf)
add_t2_conf_names_nona = add_t2_conf_names.dropna(axis=0)
add_t2_conf_names_nona.columns = add_t2_conf_names_nona.columns.str.replace('-2.0', '')
add_t2_conf_names_nona.to_csv('/ML_DATASETS/Brain/T1/additional_(t1-t2-MRI)-T2_CONF_RAW.csv', index=False)
add_t2_conf_names_nona

In [None]:
add_t2_conf = pd.read_csv('/ukbbdata/ukbb_oct23/ukb.csv', usecols=['eid', '24419-2.0'])
add_t2_conf.columns = ['eid', 'Struct.motion']
add_t2_conf_nona = add_t2_conf.dropna(axis=0).reset_index(drop=True)
add_t2_conf_full = pd.merge(add_t2_conf_names_nona, add_t2_conf_nona, on='eid')
add_t2_conf_full = pd.merge(add_t2_conf_names_nona, add_t2_conf_nona, on='eid')
add_t2_conf_full.to_csv('/ML_DATASETS/Brain/T1/additional_(t1-t2-MRI)-T2_CONF_RAW_FULL.csv', index=False)
add_t2_conf_full

Convert date to Unix & site to Dummy

In [None]:
# Convert date
import datetime
add_t2_conf_full = pd.read_csv('/ML_DATASETS/Brain/T1/additional_(t1-t2-MRI)-T2_CONF_RAW_FULL.csv')
add_t2_conf_unix = add_t2_conf_full.copy()
add_t2_conf_unix['Date of attending assessment centre'] = pd.to_datetime(add_t2_conf_unix['Date of attending assessment centre'], format="%Y-%m-%d")  #"%m/%d/%Y")
add_t2_conf_unix['Date of attending assessment centre'] = add_t2_conf_unix['Date of attending assessment centre'].apply(datetime.datetime.timestamp)

# Round values
add_t2_conf_unix['Date of attending assessment centre'] = add_t2_conf_unix['Date of attending assessment centre'].apply(int)
print(add_t2_conf_unix['Date of attending assessment centre'])

# Dummy encode site
add_t2_conf_unix_dummy = pd.get_dummies(add_t2_conf_unix, columns=['UK Biobank assessment centre'], dtype=int)

In [None]:
add_t2_conf_unix_dummy.to_csv('/ML_DATASETS/Brain/t1_t2_tMRI/add_t2_conf_unix_dummy.csv', index=False)
add_t2_conf_unix_dummy.to_csv('/Cog-Ment/PLS/brain/additional/orig/t2_conf.csv', index=False)
add_t2_conf_unix_dummy

# T1 Confounds

In [None]:
t1_conf = pd.read_csv('/ML_DATASETS/Brain/T1/struct_conf_full_dummy.csv')
t1_conf

Basic

- 25925	Intensity scaling for T1
- 26500	T2-FLAIR used (in addition to T1) to run FreeSurfer
- 25733	Amount of warping applied to non-linearly align T1 brain image to standard-space
- 25731	Discrepancy between T1 brain image and standard-space brain template (linearly-aligned)
- 25732	Discrepancy between T1 brain image and standard-space brain template (nonlinearly-aligned)
- 25735	Inverted contrast-to-noise ratio in T1
- 25734	Inverted signal-to-noise ratio in T1
- 25000	Volumetric scaling from T1 head image to standard space
- 25756	Scanner lateral (X) brain position
- 25757	Scanner transverse (Y) brain position
- 25758	Scanner longitudinal (Z) brain position
- 25759	Scanner table position

### Confounds from other MRI samples

- Head size: 25000
- Site: 54
- Acquisition date: 53
- STRUCT MOTION: 24419
- Discrepancy between T1 brain image and standard-space brain template (linearly-aligned): 25731
- Discrepancy between T1 brain image and standard-space brain template (nonlinearly-aligned): 25732

In [None]:
df_struct_conf = ukbiobank.utils.utils.loadCsv(ukbio=ukb, fields=['eid',
25925,
26500,
25733,
25731,
25732,
25735,
25734,
25000,
25756,
25757,
25758,
25759,
25000,
54,
53], instance=2)
# 24419 not found

In [None]:
struct_conf_names = ukbiobank.utils.utils.fieldIdsToNames(ukbio=ukb, df=df_struct_conf)
struct_conf_names_nona = struct_conf_names.dropna(axis=0)
struct_conf_names_nona.columns = struct_conf_names_nona.columns.str.replace('-2.0', '')
struct_conf_names_nona.to_csv('/ML_DATASETS/Brain/T1/struct_conf_names_nona_without_struct_motion.csv', index=False)
struct_conf_names_nona

Add files that are not in the main csv: 24419, STRUCT MOTION

In [None]:
struct_conf_add = pd.read_csv('/ukb.csv')
struct_conf_add = struct_conf_add[['eid','24419-2.0']]
struct_conf_add.columns = ['eid','Struct. motion']
struct_conf_add_nona = struct_conf_add.dropna(axis=0)
struct_conf_add_nona

In [None]:
struct_conf_names_nona = pd.read_csv('/ML_DATASETS/Brain/T1/struct_conf_names_nona_without_struct_motion.csv')
struct_conf_add = pd.read_csv('/ukb.csv', usecols = ['eid','24419-2.0'])

struct_conf_add.columns = ['eid','Struct. motion']
struct_conf_add_nona = struct_conf_add.dropna(axis=0)
struct_conf_full = pd.merge(struct_conf_names_nona, struct_conf_add_nona, on = 'eid')
struct_conf_full
struct_conf_full.to_csv('/ML_DATASETS/Brain/T1/struct_conf_full.csv', index=False)

# Convert date
import datetime
struct_conf_full_unix = struct_conf_full.copy()
struct_conf_full_unix['Date of attending assessment centre'] = pd.to_datetime(struct_conf_full_unix['Date of attending assessment centre'], format="%Y-%m-%d")  #"%m/%d/%Y")
struct_conf_full_unix['Date of attending assessment centre'] = struct_conf_full_unix['Date of attending assessment centre'].apply(datetime.datetime.timestamp)

# Round values
struct_conf_full_unix['Date of attending assessment centre'] = struct_conf_full_unix['Date of attending assessment centre'].apply(int)
print(struct_conf_full_unix['Date of attending assessment centre'])

# Dummy encode site
struct_conf_full_dummy = struct_conf_full_unix.copy()
struct_conf_full_dummy = pd.get_dummies(struct_conf_full_dummy, columns=['UK Biobank assessment centre'], dtype=int)
struct_conf_full_dummy = struct_conf_full_dummy.drop(columns='T2-FLAIR used (in addition to T1) to run FreeSurfer')
struct_conf_full_dummy.to_csv('/ML_DATASETS/Brain/T1/struct_conf_full_dummy.csv', index=False)
struct_conf_full_dummy