# Test-retest study on UW data using asymmetry index
In this script I will do a test retest study on some FSPGR and MPRAGE MRI sequencings. I follow the proccess from a matlab code, written by Ali Deatsch.

In [94]:
fspgrPath = '../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/'
mpragePath = '../../UW_data/Test-retest_NITRCKirby_MRI_T1_MPRAGE/'


In [95]:
import os 
import re
import numpy as np
from dateutil import parser
from collections import defaultdict

## Dataset analysis
Firstly I will take a look at my dataset: number of subjects, number of longitudinal scans, median time between scans...

Function to count how many different patients we have and how many scans each of them has.

In [111]:
def count_patients_scans_helper(folder_path, pattern):
    
    patient_scans = defaultdict(int)
    
    # Loop through all the files in the folder
    for filename in os.listdir(folder_path):
        # Search for the patient ID in the filename
        match = pattern.search(filename)
        
        if match:
            patient_id = match.group(1)  # Extract patient ID
            # Increment the count for this patient
            patient_scans[patient_id] += 1
            
    return patient_scans

In [112]:
def count_patients_scan(fspgr_folder_path, mprage_folder_path):
    # Regular expression to find the patient ID in filenames
    fspgr_pattern = re.compile(r'c1bet_crf_rsl_([A-Z0-9]{6})')  # FSPGR patient ID pattern
    mprage_pattern = re.compile(r'c1bet_crf_rsl_(S[0-9]{3})')   # MPRAGE patient ID pattern
    
    # Use the helper function to count scans in both folders
    fspgr_patient_scans = count_patients_scans_helper(fspgr_folder_path, fspgr_pattern)
    mprage_patient_scans = count_patients_scans_helper(mprage_folder_path, mprage_pattern)
    
    return fspgr_patient_scans, mprage_patient_scans

In [113]:
fspgr_patient_scans = count_patients_scan(fspgrPath, mpragePath)[0]
mprage_patient_scans = count_patients_scan(fspgrPath, mpragePath)[1]

In [114]:
print(f"Number of different patients with FSPGR scan: {len(fspgr_patient_scans)}\n")

for patient_id, scan_count in fspgr_patient_scans.items():
    print(f"Patient ID: {patient_id} - Number of scans: {scan_count}")

print('--------------------------------------------------------------')

print(f"Number of different patients with MPRAGE scan: {len(mprage_patient_scans)}\n")

for patient_id, scan_count in mprage_patient_scans.items():
    print(f"Patient ID: {patient_id} - Number of scans: {scan_count}")

Number of different patients with FSPGR scan: 16

Patient ID: GJDGYD - Number of scans: 2
Patient ID: GJDGYR - Number of scans: 2
Patient ID: GJDGYZ - Number of scans: 2
Patient ID: GJDGZH - Number of scans: 3
Patient ID: GJDGZW - Number of scans: 2
Patient ID: GJDH1X - Number of scans: 2
Patient ID: GJDH22 - Number of scans: 2
Patient ID: GJDH2A - Number of scans: 2
Patient ID: GJDH2Z - Number of scans: 2
Patient ID: GJDH4C - Number of scans: 2
Patient ID: GJDH4Q - Number of scans: 2
Patient ID: GJDHDK - Number of scans: 2
Patient ID: GJDHVO - Number of scans: 3
Patient ID: GJDHZ6 - Number of scans: 2
Patient ID: GJDHZF - Number of scans: 2
Patient ID: GJDI07 - Number of scans: 2
--------------------------------------------------------------
Number of different patients with MPRAGE scan: 21

Patient ID: S113 - Number of scans: 2
Patient ID: S127 - Number of scans: 2
Patient ID: S142 - Number of scans: 2
Patient ID: S239 - Number of scans: 2
Patient ID: S346 - Number of scans: 2
Patien

For now I'm only interested in those with 2 scan. I will filter out the dictionary to contain only those. 

In [115]:
fspgr_patient_scans = {patient_id: scans for patient_id, scans in fspgr_patient_scans.items() if scans == 2}

In [165]:
print(f"Number of different patients: {len(fspgr_patient_scans)}\n")

for patient_id, scan_count in fspgr_patient_scans.items():
    print(f"Patient ID: {patient_id} - Number of scans: {scan_count}")

Number of different patients: 14

Patient ID: GJDGYD - Number of scans: 2
Patient ID: GJDGYR - Number of scans: 2
Patient ID: GJDGYZ - Number of scans: 2
Patient ID: GJDGZW - Number of scans: 2
Patient ID: GJDH1X - Number of scans: 2
Patient ID: GJDH22 - Number of scans: 2
Patient ID: GJDH2A - Number of scans: 2
Patient ID: GJDH2Z - Number of scans: 2
Patient ID: GJDH4C - Number of scans: 2
Patient ID: GJDH4Q - Number of scans: 2
Patient ID: GJDHDK - Number of scans: 2
Patient ID: GJDHZ6 - Number of scans: 2
Patient ID: GJDHZF - Number of scans: 2
Patient ID: GJDI07 - Number of scans: 2


Now I want to calculate the median, max and min time between two scans. 

Function that creates a dictionary with patient ID as keys and their scan dates as values.

In [128]:
def date_of_scans_helper(folder_path, pattern, date_pattern):    

    # empty dictionary to store patient dates, dictionary with empty list as value
    patient_dates = defaultdict(list)

    # iterating throught all files in folder
    for filename in os.listdir(folder_path):

        # finding match with pattern
        match = pattern.search(filename)

        if match:
            # extracting patient ID
            patient_id = match.group(1)
            # only looking at those patient with 2 scans
            if patient_id in fspgr_patient_scans:
                date_match = date_pattern.search(filename)
                if date_match:
                    patient_dates[patient_id].append(date_match.group(0))

    return patient_dates 

In [129]:
def date_of_scans(fspgr_folder_path):

    # regular expression for ID and date format
    fspgr_pattern = re.compile(r'c1bet_crf_rsl_([A-Z0-9]{6})')
    fspgr_date_pattern = re.compile(r'\d{4}-\d{2}-\d{2}')

    fspgr_dates = date_of_scans_helper(fspgr_folder_path, fspgr_pattern, fspgr_date_pattern)

    return fspgr_dates

In [130]:
fspgr_scan_dates = date_of_scans(fspgrPath)

In [131]:
for patient_id, dates in fspgr_scan_dates.items():
    print(f"Patient {patient_id} has scan dates: {dates}")

Patient GJDGYD has scan dates: ['2012-06-17', '2012-06-29']
Patient GJDGYR has scan dates: ['2019-05-11', '2019-05-15']
Patient GJDGYZ has scan dates: ['2014-11-13', '2014-11-29']
Patient GJDGZW has scan dates: ['2013-03-20', '2013-03-26']
Patient GJDH1X has scan dates: ['2017-05-14', '2017-05-23']
Patient GJDH22 has scan dates: ['2011-02-21', '2011-02-26']
Patient GJDH2A has scan dates: ['2015-09-28', '2015-10-03']
Patient GJDH2Z has scan dates: ['2019-05-14', '2019-05-22']
Patient GJDH4C has scan dates: ['2016-07-10', '2016-07-17']
Patient GJDH4Q has scan dates: ['2015-02-28', '2015-03-09']
Patient GJDHDK has scan dates: ['2016-09-15', '2016-10-03']
Patient GJDHZ6 has scan dates: ['2019-08-19', '2019-09-16']
Patient GJDHZF has scan dates: ['2019-06-11', '2019-07-01']
Patient GJDI07 has scan dates: ['2019-11-09', '2019-11-16']


Finally calculating the time differences.

In [145]:
fspgr_time_between_scans = []

for dates_array in fspgr_scan_dates.values():

    date0 = parser.parse(dates_array[0])
    date1 = parser.parse(dates_array[1])

    diff = date1 - date0

    fspgr_time_between_scans.append(diff.days)

In [146]:
print(fspgr_time_between_scans)

[12, 4, 16, 6, 9, 5, 5, 8, 7, 9, 18, 28, 20, 7]


In [147]:
fspgr_average = np.mean(fspgr_time_between_scans)
fspgr_medi = int(np.median(fspgr_time_between_scans))
fspgr_minimum = np.min(fspgr_time_between_scans)
fspgr_maximum = np.max(fspgr_time_between_scans)

In [148]:
print(f'''Average time between two scans: {fspgr_average} days
Median time between two scans: {fspgr_medi} days
Range: {fspgr_minimum}-{fspgr_maximum} days''')

Average time between two scans: 11.0 days
Median time between two scans: 8 days
Range: 4-28 days


## Asymmetry index

Getting the file paths of those patients with 2 scans. 

In [166]:
def filepaths_helper(folder, pattern, patient_scans): 

    # storing the file paths in an empty array
    fp = []

    for filename in os.listdir(folder):

        filepath = os.path.join(folder, filename)

        match = pattern.search(filename)

        if match: 

            patient_id = match.group(1)

            if patient_id in patient_scans:

                fp.append(filepath)

    return fp

In [167]:
def filepaths(fspgr_folder, mprage_folder):

    fspgr_pattern = re.compile(r'wbet_crf_rsl_([A-Z0-9]{6})')
    mprage_pattern = re.compile(r'wbet_crf_rsl_(S[0-9]{3})')

    fspgr_files = filepaths_helper(fspgr_folder, fspgr_pattern, fspgr_patient_scans)
    mprage_files = filepaths_helper(mprage_folder, mprage_pattern, mprage_patient_scans)

    return fspgr_files, mprage_files

In [168]:
fspgr_files = filepaths(fspgrPath, mpragePath)[0]
mprage_files = filepaths(fspgrPath, mpragePath)[1]

In [169]:
print(fspgr_files)
print(len(fspgr_files))
print(mprage_files)
print(len(mprage_files))

['../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/wbet_crf_rsl_GJDGYD_2012-06-17 19_59_01_6 - Ax T1 bravo 3mm.nii', '../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/wbet_crf_rsl_GJDGYD_2012-06-29 18_01_43_7 - Ax T1 BRAVO 3mm.nii', '../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/wbet_crf_rsl_GJDGYR_2019-05-11 22_08_04_6 - Ax_T1_BRAVO_2_4mm.nii', '../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/wbet_crf_rsl_GJDGYR_2019-05-15 14_45_46_7 - Ax_T1_BRAVO_2_4mm.nii', '../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/wbet_crf_rsl_GJDGYZ_2014-11-13 10_58_47_6 - Ax T1 BRAVO.nii', '../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/wbet_crf_rsl_GJDGYZ_2014-11-29 22_11_27_9 - 3D T1 BRAVO 3mm.nii', '../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/wbet_crf_rsl_GJDGZW_2013-03-20 09_08_56_6 - Ax T1 BRAVO 3mm.nii', '../../UW_data/Test-retest_UWCCC_MRI_AxT1_FSPGRbravo_refined/wbet_crf_rsl_GJDGZW_2013-03-26 07_09_45_6 - Ax T1

Separating first scans from second scans.

In [171]:
def separate_scans(files):

    first_scans = []
    second_scans = []

    for i in range(len(files)):
        if i % 2 == 0:
            first_scans.append(files[i])
        else: 
            second_scans.append(files[i])

    return first_scans, second_scans

In [172]:
fspgr_first_scans = separate_scans(fspgr_files)[0]
fspgr_second_scans = separate_scans(fspgr_files)[1]

mprage_first_scans = separate_scans(mprage_files)[0]
mprage_second_scans = separate_scans(mprage_files)[1]