## Data download (manual)
Download the IXI T1 images from:
https://brain-development.org/ixi-dataset/

Steps:
1. Choose **T1 images (all images)**.
2. Download and extract the archive.
3. Place the extracted folder under `../data/raw/IXI_T1/` (relative to this notebook).

After that, run the remaining cells below.

In [1]:
import nibabel as nib
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
import pandas as pd

In [4]:
# Raw data should already exist at ../data/raw/IXI_T1
# Download manually from https://brain-development.org/ixi-dataset/ (T1 images).

In [23]:
def convert_single_img(filename):
    # Load NIfTI file
    nii = nib.load(filename) #'~/Downloads/IXI-T1/IXI012-HH-1211-T1.nii.gz'
    data = nii.get_fdata()

    # Save middle slice as PNG
    slice_idx = data.shape[2] // 2
    # Load your slice
    slice_data = data[:, :, slice_idx]
    # Rotate 90 degrees counter-clockwise
    slice_data = np.rot90(slice_data)

    plt.imsave('slice.png', slice_data, cmap='gray')

In [24]:
convert_single_img('~/Downloads/IXI-MRA/IXI002-Guys-0828-MRA.nii.gz')

In [29]:
def convert_all_img(nii_dir, png_dir):
    #if os.path.isdir(png_dir):
    #    print(f"{png_dir} already exists. Skipping conversion.")
    #    return

    os.makedirs(png_dir, exist_ok=True)

    nii_files = [f for f in os.listdir(os.path.expanduser(nii_dir)) if f.endswith('.nii.gz')]
    print(len(nii_files))

    # Load all NIfTI images into a list
    images = []
    for file in nii_files:
        try:
            #print(f"Converting {file}...")
            nii = nib.load(os.path.join(os.path.expanduser(nii_dir), file))
            data = nii.get_fdata()
            slice_idx = data.shape[2] // 2

            slice_data = data[:, :, slice_idx] # Load your slice
            #slice_data = np.rot90(slice_data) # Rotate 90 degrees counter-clockwise I only used this for T1

            image_fname = os.path.join(png_dir, file)
            image_fname = image_fname.replace(".nii.gz", ".png")
            plt.imsave(image_fname, slice_data, cmap='gray') #saves images to data directory
        except Exception as e:
            print(f"Failed to convert {file}: {e}")
            continue

In [30]:
#convert_all_img("~/Downloads/IXI-T2", "../data/raw/IXI_T2_png")
#convert_all_img("~/Downloads/IXI-PD", "../data/raw/IXI_PD_png")
convert_all_img("~/Downloads/IXI-MRA", "../data/raw/IXI_MRA_png")


570
Failed to convert IXI371-IOP-0970-MRA.nii.gz: Expected 190840832 bytes, got 127926272 bytes from 
 - could the file be damaged?


In [7]:
def convert_excel_to_csv(filename):
    excel_file = filename
    df = pd.read_excel(excel_file)
    print(df.head())
    df.to_csv('../IXI.csv', index=False) # Save as .csv

In [8]:
convert_excel_to_csv("../IXI.xls")

   IXI_ID  SEX_ID (1=m, 2=f)  HEIGHT  WEIGHT  ETHNIC_ID  MARITAL_ID  \
0       1                  1     170      80          2           3   
1       2                  2     164      58          1           4   
2      12                  1     175      70          1           2   
3      13                  1     182      70          1           2   
4      14                  2     163      65          1           4   

   OCCUPATION_ID  QUALIFICATION_ID         DOB  DATE_AVAILABLE STUDY_DATE  \
0              5                 2  1968-02-22               0        NaT   
1              1                 5  1970-01-30               1 2005-11-18   
2              1                 5  1966-08-20               1 2005-06-01   
3              1                 5  1958-09-15               1 2005-06-01   
4              1                 5  1971-03-15               1 2005-06-09   

         AGE  
0        NaN  
1  35.800137  
2  38.781656  
3  46.710472  
4  34.236824  


In [39]:
def map_id(csv_filename):
    # 1. Load the demographic CSV
    df = pd.read_csv(csv_filename)
    
    # 2. Define your modality directories
    # Adjust these paths to match your local folder structure
    modalities = {
        'T1': '../data/raw/IXI_T1_png',
        'T2': '../data/raw/IXI_T2_png',
        'PD': '../data/raw/IXI_PD_png',
        'MRA': '../data/raw/IXI_MRA_png'
    }

    # 3. Create a dictionary to hold mappings for each modality
    # Key: Subject_ID (int), Value: Dictionary of {Modality_Name: Filename}
    master_mapping = {}

    for mod_name, path in modalities.items():
        if not os.path.exists(path):
            print(f"Warning: Directory not found for {mod_name}: {path}")
            continue
            
        image_files = [f for f in os.listdir(path) if f.endswith('.png')]
        print(f"Found {len(image_files)} files for modality {mod_name}")
        
        for filename in image_files:
            # Extract ID (e.g., "IXI050-Guys-0711-T1.png" -> "IXI050")
            id_str = filename.split('-')[0]
            try:
                subject_id = int(id_str.replace('IXI', ''))
                
                # If this ID isn't in our master dict yet, add it
                if subject_id not in master_mapping:
                    master_mapping[subject_id] = {'IXI_ID': subject_id}
                
                # Store the filename in the column corresponding to the modality
                master_mapping[subject_id][f'{mod_name}_file_name'] = filename
                
            except ValueError:
                continue

    # 4. Convert our nested dictionary into a DataFrame
    df_files = pd.DataFrame(list(master_mapping.values()))
    print(df_files.head())

    # 5. Merge with original demographics
    # We use 'inner' if you only want people who have images, 
    # or 'left' if you want to keep all people from the CSV.
    df_final = pd.merge(df, df_files, on='IXI_ID', how='inner')
    print(df_final.head())

    # 6. Check for missing values
    # This shows you how many people are missing specific scans
    print("\nScan availability summary:")
    print(df_final[['T1_file_name', 'T2_file_name', 'PD_file_name', 'MRA_file_name']].notnull().sum())

    # 7. Save
    df_final.to_csv('../IXI_with_filenames.csv', index=False)
    return df_final

In [40]:
df = map_id("../IXI.csv")
df.head()

Found 581 files for modality T1
Found 578 files for modality T2
Found 578 files for modality PD
Found 569 files for modality MRA
   IXI_ID             T1_file_name             T2_file_name  \
0     494  IXI494-Guys-1008-T1.png  IXI494-Guys-1008-T2.png   
1     550  IXI550-Guys-1069-T1.png  IXI550-Guys-1069-T2.png   
2      62  IXI062-Guys-0740-T1.png  IXI062-Guys-0740-T2.png   
3     331   IXI331-IOP-0892-T1.png   IXI331-IOP-0892-T2.png   
4     307   IXI307-IOP-0872-T1.png   IXI307-IOP-0872-T2.png   

              PD_file_name             MRA_file_name  
0  IXI494-Guys-1008-PD.png  IXI494-Guys-1008-MRA.png  
1  IXI550-Guys-1069-PD.png  IXI550-Guys-1069-MRA.png  
2  IXI062-Guys-0740-PD.png  IXI062-Guys-0740-MRA.png  
3   IXI331-IOP-0892-PD.png   IXI331-IOP-0892-MRA.png  
4   IXI307-IOP-0872-PD.png   IXI307-IOP-0872-MRA.png  
   IXI_ID  SEX_ID (1=m, 2=f)  HEIGHT  WEIGHT  ETHNIC_ID  MARITAL_ID  \
0       2                  2     164      58          1           4   
1      12           

Unnamed: 0,IXI_ID,"SEX_ID (1=m, 2=f)",HEIGHT,WEIGHT,ETHNIC_ID,MARITAL_ID,OCCUPATION_ID,QUALIFICATION_ID,DOB,DATE_AVAILABLE,STUDY_DATE,AGE,T1_file_name,T2_file_name,PD_file_name,MRA_file_name
0,2,2,164,58,1,4,1,5,1970-01-30,1,2005-11-18,35.800137,IXI002-Guys-0828-T1.png,IXI002-Guys-0828-T2.png,IXI002-Guys-0828-PD.png,IXI002-Guys-0828-MRA.png
1,12,1,175,70,1,2,1,5,1966-08-20,1,2005-06-01,38.781656,IXI012-HH-1211-T1.png,IXI012-HH-1211-T2.png,IXI012-HH-1211-PD.png,IXI012-HH-1211-MRA.png
2,13,1,182,70,1,2,1,5,1958-09-15,1,2005-06-01,46.710472,IXI013-HH-1212-T1.png,IXI013-HH-1212-T2.png,IXI013-HH-1212-PD.png,IXI013-HH-1212-MRA.png
3,14,2,163,65,1,4,1,5,1971-03-15,1,2005-06-09,34.236824,IXI014-HH-1236-T1.png,IXI014-HH-1236-T2.png,IXI014-HH-1236-PD.png,
4,15,1,181,90,2,1,6,5,1981-03-11,1,2005-06-23,24.284736,IXI015-HH-1258-T1.png,IXI015-HH-1258-T2.png,IXI015-HH-1258-PD.png,IXI015-HH-1258-MRA.png
