## Matching MRIT1 scans with UDS visit diagnosis
I got two files: MRIT1_longitudinal_narrow_v2.csv and MRIT1_longitudinal_UDS_narrow_v2.csv. <br>
<br>
First containts MRIT1 scan info, the second containts UDS visit info. Both are narrowed down to the same patients.<br>
<br>
I want to join those  two files together. 

In [6]:
import pandas as pd
from dateutil import parser
import numpy as np

In [7]:
loadPath = '../data/'
writePath = '../data/'

In [8]:
df_scans = pd.read_csv(loadPath + 'MRIT1_longitudinal_narrow_v2.csv')
df_uds = pd.read_csv(loadPath + 'MRIT1_longitudinal_UDS_narrow_v2.csv')

I want to match UDSD to a MRIT1 scan. I will match the closest MRIT1 scans and UDSD visits together (if they are within 1 year) and discard the others. 

In [9]:
matched_data = []

In [10]:
# Iterate over each unique NACCID in the scan data
for naccid in df_scans['NACCID'].unique():

    # Filter data for this patient
    patient_scans = df_scans[df_scans['NACCID'] == naccid]
    patient_uds = df_uds[df_uds['NACCID'] == naccid]
    
    # Convert scan dates using dateutil.parser
    scan_dates = [parser.parse(f"{row['MRIYR']}-{row['MRIMO']}-{row['MRIDY']}") for idx, row in patient_scans.iterrows()]
    
    # Convert UDS visit dates using dateutil.parser
    uds_dates = [parser.parse(f"{row['VISITYR']}-{row['VISITMO']}-{row['VISITDAY']}") for idx, row in patient_uds.iterrows()]
    
    # Iterate through each scan to find the closest UDS visit within 1 year
    for scan_idx, scan_date in enumerate(scan_dates):
        
        closest_visit = None
        min_diff = pd.Timedelta(days=365)  # 1 year in days
        
        for uds_idx, uds_date in enumerate(uds_dates):
            time_diff = abs(scan_date - uds_date)
            
            # Check if the visit is within 1 year of the scan
            if time_diff < min_diff:
                min_diff = time_diff
                closest_visit = uds_idx
        
        # If a match is found within 1 year, append the data
        if closest_visit is not None:
            matched_row = {
                'NACCID': naccid,
                'NACCMNUM': patient_scans.iloc[scan_idx]['NACCMNUM'],
                'MRIMO': patient_scans.iloc[scan_idx]['MRIMO'],
                'MRIDY': patient_scans.iloc[scan_idx]['MRIDY'],
                'MRIYR': patient_scans.iloc[scan_idx]['MRIYR'],
                'NACCVNUM': patient_uds.iloc[closest_visit]['NACCVNUM'],
                'VISITMO': patient_uds.iloc[closest_visit]['VISITMO'],
                'VISITDAY': patient_uds.iloc[closest_visit]['VISITDAY'],
                'VISITYR': patient_uds.iloc[closest_visit]['VISITYR'],
                'NACCUDSD': patient_uds.iloc[closest_visit]['NACCUDSD'],
                'NACCALZD': patient_uds.iloc[closest_visit]['NACCALZD'],
                'NACCMRFI': patient_scans.iloc[scan_idx]['NACCMRFI']
            }
            matched_data.append(matched_row)

In [11]:
# Convert matched data to DataFrame 
matched_df = pd.DataFrame(matched_data)

In [14]:
print(matched_df['NACCID'].nunique())

1377


In [None]:
matched_df.to_csv(writePath + 'MRIT1_longitudinal_matched_w_UDS.csv', index=False)
print("Matching complete!")