# Multi-modal MRI Analysis
Goal : To understand data -> clean & Preprocess CSV data -> clean & preprocess MRI data  -> adapt a 3D model to predict deformation fields

In [1]:
!ls

data  understand_data.ipynb  understand_data.ipynb.amltmp


In [2]:
%cd projects/'MRI analysis'/

[Errno 2] No such file or directory: 'projects/MRI analysis/'
/mnt/batch/tasks/shared/LS_root/mounts/clusters/e4ds-v4/code/projects/MRI analysis


### Mounting data

In [3]:
from azureml.core import Workspace, Datastore
from azureml.data.dataset_factory import FileDatasetFactory

import os
import pandas as pd
import nibabel as nib

In [4]:
ws = Workspace.from_config()
ds = Datastore.get(ws, datastore_name='cvdatastore')
dataset = FileDatasetFactory.from_files(path=(ds, 'Oasis/'), validate=False)
mount_path = dataset.mount()
mount_path.start()
print("Dataset mounted at: ", mount_path.mount_point)

Dataset mounted at:  /tmp/tmpm1m1z_ux


In [5]:
os.listdir(mount_path.mount_point)

['OAS2_RAW_PART1', 'OAS2_RAW_PART2']

In [6]:
df = pd.read_csv(
    os.path.join("data", 'oasis_longitudinal_demographics.csv')
    )

### Data Analysis

#### CSV

In [7]:
len(df)

373

In [8]:
df.head()

Unnamed: 0,Subject ID,MRI ID,Group,Visit,MR Delay,M/F,Hand,Age,EDUC,SES,MMSE,CDR,eTIV,nWBV,ASF
0,OAS2_0001,OAS2_0001_MR1,Nondemented,1,0,M,R,87,14,2.0,27.0,0.0,1987,0.696,0.883
1,OAS2_0001,OAS2_0001_MR2,Nondemented,2,457,M,R,88,14,2.0,30.0,0.0,2004,0.681,0.876
2,OAS2_0002,OAS2_0002_MR1,Demented,1,0,M,R,75,12,,23.0,0.5,1678,0.736,1.046
3,OAS2_0002,OAS2_0002_MR2,Demented,2,560,M,R,76,12,,28.0,0.5,1738,0.713,1.01
4,OAS2_0002,OAS2_0002_MR3,Demented,3,1895,M,R,80,12,,22.0,0.5,1698,0.701,1.034


In [9]:
df['Group'].value_counts()

Group
Nondemented    190
Demented       146
Converted       37
Name: count, dtype: int64

In [10]:
print("Visit counts:")
df['Visit'].value_counts()

Visit counts:


Visit
1    150
2    144
3     58
4     15
5      6
Name: count, dtype: int64

UNDERSTANIDING of DATA

So we have 373 entries, same subject Id having multiple visits (not all).
> 150 patients had visit 1

> 144 patients had vsisit 2

> 58 patients had visit 3

> 15 patients had visit 4

> 6 patients had visit 5

we only need the patients that had more than 1 visit for longitudinal imaging registration. Maybe also only consider who are diagonesed ("Demented" and "Converted")

##### Data cleaning & filtering

In [11]:
subject_IDs = []
for subj, grp in df.groupby('Subject ID'):
    if grp["Visit"].max() > 1:
        subject_IDs.append(subj)
len(subject_IDs)

150

As we see here, all of the patients have more than one MRI taken. i.e., all of the patients has more than 1 visit. 
that means, the data doesn't have all of the visit's MRI.

So we will narrow it down to Subjects only diagoned

In [12]:
subjects = []

for subj_id, group_df in df.groupby('Subject ID'):
    if (group_df["Group"].isin(['Demented', 'Converted']).any()):
        subjects.append(subj_id)

len(subjects)
        

78

among 150 subjects 78 subjects has been diagnosed. 
will be using all the 150 subjects for the strain map detection.

#### including MRI paths with CSV

In [13]:
paths = []
for dir in os.listdir(mount_path.mount_point):
    data_path = os.path.join(mount_path.mount_point, dir)
    print("subdir:",len(os.listdir(data_path)))
    for sub_dir in os.listdir(data_path):
        fin_path=os.path.join(data_path, sub_dir, "RAW")
        if os.path.exists(fin_path):
            paths.append(fin_path)
        else:
            print("Path does not exist:", fin_path)
print("Total MRIs: ", len(paths))
        

subdir: 209
subdir: 164
Total MRIs:  373


In [14]:
paths[:5]

['/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0001_MR1/RAW',
 '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0001_MR2/RAW',
 '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0002_MR1/RAW',
 '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0002_MR2/RAW',
 '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0002_MR3/RAW']

In [15]:
# we can pinpoint one example path for a subject ID using the following method
subject_IDs[0] in paths[1]

True

In [19]:
paths[0]

'/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0001_MR1/RAW'

In [33]:
# structuring the data for easier access later
data_dict = {}
for path in paths:
    parts = path.split(os.sep)
    subj_id = parts[-2][:9]
    visit_num = parts[-2][-1]
    if subj_id not in data_dict:
        data_dict[subj_id] = {}
    data_dict[subj_id][visit_num] = path

In [34]:
data_dict

{'OAS2_0001': {'1': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0001_MR1/RAW',
  '2': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0001_MR2/RAW'},
 'OAS2_0002': {'1': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0002_MR1/RAW',
  '2': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0002_MR2/RAW',
  '3': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0002_MR3/RAW'},
 'OAS2_0004': {'1': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0004_MR1/RAW',
  '2': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0004_MR2/RAW'},
 'OAS2_0005': {'1': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0005_MR1/RAW',
  '2': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0005_MR2/RAW',
  '3': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0005_MR3/RAW'},
 'OAS2_0007': {'1': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0007_MR1/RAW',
  '3': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0007_MR3/RAW',
  '4': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0007_MR4/RAW'},
 'OAS2_0008': {'1': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0008_MR1/RAW',
  '2': '/tmp/tmpm1m1z_ux/OAS2_RAW_PART1/OAS2_0008_MR2/RAW'},
 'OAS2_0009': {'