In [3]:
import nibabel as nib
import numpy as np
import os
import pandas as pd
from PIL import Image
import xml.etree.ElementTree as ET
from skimage.transform import resize
from skimage import img_as_ubyte  # Additional import to safely convert to ubyte

def convert_img_to_jpegs(input_img_path, output_folder, img_dim=(224, 224)):
    # Load the .img file using nibabel
    img = nib.load(input_img_path)
    data = img.get_fdata()

    # Normalize the data to 0-255
    data = 255 * (data - np.min(data)) / (np.max(data) - np.min(data))
    data = data.astype(np.uint8)

    output_paths = []

    # Iterate over all slices in the 3rd dimension
    for slice_index in range(data.shape[2]):
        slice_data = data[:, :, slice_index]
        
        # Resize image if needed
        if slice_data.shape != img_dim:
            slice_data = resize(slice_data, img_dim, anti_aliasing=True)
            slice_data = img_as_ubyte(slice_data)  # Ensure type is uint8 and avoid overflow
        
        # Ensure the data is 2D for PIL conversion
        if len(slice_data.shape) != 2:
            slice_data = slice_data.squeeze()  # Removes any singleton dimensions
        
        # Convert to PIL image and save as JPEG
        img_pil = Image.fromarray(slice_data)
        img_filename = f"{os.path.basename(input_img_path).replace('.img', '')}_slice_{slice_index}.jpeg"
        output_path = os.path.join(output_folder, img_filename)
        img_pil.save(output_path, "JPEG")
        output_paths.append(output_path)

    return output_paths


def extract_metadata_from_xml(xml_file_path):
    """Extracts metadata from XML file."""
    tree = ET.parse(xml_file_path)
    root = tree.getroot()
    metadata = {}
    for child in root:
        metadata[child.tag] = child.text
    return metadata

def process_images(base_dir, output_dir):
    # Ensure the output directory exists
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    records = []
    # Walk through the directory structure
    for root, dirs, files in os.walk(base_dir):
        # Find only the .img files in the T88_111 subdirectory
        if 'T88_111' in root:
            img_files = [f for f in files if f.endswith('.img')]
            subject_id = root.split(os.sep)[-4]  # Assuming 'OAS1_xxxx_MRy' is 4 levels up
            session_id = root.split(os.sep)[-3]  # Session folder is directly 3 levels up
            
            # Path to the XML file
            xml_path = os.path.join(os.sep.join(root.split(os.sep)[:-2]), f'{session_id}.xml')

            # Extract metadata if XML file exists
            metadata = {}
            if os.path.exists(xml_path):
                metadata = extract_metadata_from_xml(xml_path)

            # Process each image file found in the T88_111 directory
            for img_file in img_files:
                img_path = os.path.join(root, img_file)
                output_paths = convert_img_to_jpegs(img_path, output_dir)
                
                # Store metadata and paths for each slice
                for output_path in output_paths:
                    records.append({
                        "file_path": output_path,
                        "subject_id": subject_id,
                        "session_id": session_id,
                        **metadata  # Merge additional metadata from XML
                    })

    # Create a DataFrame with all records
    df = pd.DataFrame(records)
    return df

# Example usage:
base_directory = 'Data/OASIS1'
output_directory = 'Data/OASIS_Extracted'
df_images = process_images(base_directory, output_directory)
print(df_images.head())



                                           file_path     subject_id session_id
0  Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1  PROCESSED
1  Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1  PROCESSED
2  Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1  PROCESSED
3  Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1  PROCESSED
4  Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1  PROCESSED


In [18]:
print(df_images)


                                                file_path     subject_id  \
0       Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1   
1       Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1   
2       Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1   
3       Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1   
4       Data/OASIS_Extracted/OAS1_0448_MR1_mpr_n4_anon...  OAS1_0448_MR1   
...                                                   ...            ...   
153467  Data/OASIS_Extracted/OAS1_0087_MR1_mpr_n4_anon...  OAS1_0087_MR1   
153468  Data/OASIS_Extracted/OAS1_0087_MR1_mpr_n4_anon...  OAS1_0087_MR1   
153469  Data/OASIS_Extracted/OAS1_0087_MR1_mpr_n4_anon...  OAS1_0087_MR1   
153470  Data/OASIS_Extracted/OAS1_0087_MR1_mpr_n4_anon...  OAS1_0087_MR1   
153471  Data/OASIS_Extracted/OAS1_0087_MR1_mpr_n4_anon...  OAS1_0087_MR1   

       session_id  
0       PROCESSED  
1       PROCESSED  
2       PROCESSED  
3      

In [20]:
import os
import shutil

# Define the path to the directory containing the images
src_directory = 'Data/OASIS_Extracted'

# Loop over each file in the directory
for filename in os.listdir(src_directory):
    if filename.endswith(".jpeg"):  # Check if the file is a JPEG image
        # Extract the subfolder name from the first 9 characters of the filename
        subfolder_name = filename[:9]
        
        # Create the path to the new subfolder if it doesn't already exist
        subfolder_path = os.path.join(src_directory, subfolder_name)
        if not os.path.exists(subfolder_path):
            os.makedirs(subfolder_path)
        
        # Move the file to the new subfolder
        src_file_path = os.path.join(src_directory, filename)
        dst_file_path = os.path.join(subfolder_path, filename)
        shutil.move(src_file_path, dst_file_path)

print("Images have been sorted into subfolders.")


Images have been sorted into subfolders.


In [21]:
# Function to extract the folder name from the filename and prepend it to the existing path
def update_path(path):
    # Split the path to get the filename
    parts = path.split('/')
    # Get the filename which is the last part of the path
    filename = parts[-1]
    # Extract the first 9 characters of the filename to use as the folder name
    folder_name = filename[:9]
    # Insert the new folder name into the path just before the filename
    new_path = '/'.join(parts[:-1] + [folder_name, filename])
    return new_path

# Apply the function to update the file paths in the DataFrame
df_images['file_path'] = df_images['file_path'].apply(update_path)

# Print the updated DataFrame to verify changes
print(df_images.head())

                                           file_path     subject_id session_id
0  Data/OASIS_Extracted/OAS1_0448/OAS1_0448_MR1_m...  OAS1_0448_MR1  PROCESSED
1  Data/OASIS_Extracted/OAS1_0448/OAS1_0448_MR1_m...  OAS1_0448_MR1  PROCESSED
2  Data/OASIS_Extracted/OAS1_0448/OAS1_0448_MR1_m...  OAS1_0448_MR1  PROCESSED
3  Data/OASIS_Extracted/OAS1_0448/OAS1_0448_MR1_m...  OAS1_0448_MR1  PROCESSED
4  Data/OASIS_Extracted/OAS1_0448/OAS1_0448_MR1_m...  OAS1_0448_MR1  PROCESSED


In [5]:
def parse_txt_metadata(txt_file_path):
    """Parses metadata from a structured TXT file into a dictionary."""
    metadata = {}
    current_scan = None  # To keep track of which scan's details are being parsed

    with open(txt_file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if ':' in line:
                key, value = line.split(':', 1)
                key = key.strip()
                value = value.strip()
                if 'SCAN NUMBER' in key:  # Start of a new scan block
                    current_scan = value  # Update current scan to this scan number
                    metadata[current_scan] = {}
                elif current_scan:  # If currently parsing a scan block
                    metadata[current_scan][key] = value
                else:
                    metadata[key] = value
            elif line.startswith('mpr-'):  # This handles lines like 'mpr-1      MPRAGE'
                scan_number, scan_type = line.split()
                metadata[scan_number] = scan_type

    return metadata


In [15]:
import pandas as pd

def flatten_metadata(metadata):
    """Flatten a metadata dictionary into a DataFrame."""
    # Base metadata excluding mpr details
    base_metadata = {k: v for k, v in metadata.items() if not k.startswith('mpr-')}
    
    rows = []
    # Iterate over each mpr entry
    for mpr_key, mpr_data in metadata.items():
        if mpr_key.startswith('mpr-'):
            # Combine base metadata with mpr-specific data
            full_data = {**base_metadata, **mpr_data}
            full_data['mpr'] = mpr_key  # Keep track of which mpr this is
            rows.append(full_data)
    
    return pd.DataFrame(rows)

def collect_and_concatenate_metadata(base_dir):
    combined_df = pd.DataFrame()
    # Iterate through each file and flatten the metadata
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith('.txt') and 'other_txt' not in file:
                txt_file_path = os.path.join(root, file)
                metadata = parse_txt_metadata(txt_file_path)
                flattened_df = flatten_metadata(metadata)
                # Append the flattened data to the combined DataFrame
                combined_df = pd.concat([combined_df, flattened_df], ignore_index=True)
                
    return combined_df


In [16]:
base_directory = 'Data/OASIS1'
metadata_df = collect_and_concatenate_metadata(base_directory)
print(metadata_df.head())

      SESSION ID AGE     M/F   HAND EDUC SES CDR MMSE     eTIV   ASF   nWBV  \
0  OAS1_0448_MR1  22  Female  Right                    1524.00  1.15  0.858   
1  OAS1_0448_MR1  22  Female  Right                    1524.00  1.15  0.858   
2  OAS1_0448_MR1  22  Female  Right                    1524.00  1.15  0.858   
3  OAS1_0448_MR1  22  Female  Right                    1524.00  1.15  0.858   
4  OAS1_0449_MR1  71  Female  Right    3   4   0   29  1264.00  1.39  0.818   

     TYPE      Vox res (mm) Rect. Fov Orientation TR (ms) TE (ms) TI (ms)  \
0  MPRAGE  1.0 x 1.0 x 1.25   256/256         Sag     9.7     4.0    20.0   
1  MPRAGE  1.0 x 1.0 x 1.25   256/256         Sag     9.7     4.0    20.0   
2  MPRAGE  1.0 x 1.0 x 1.25   256/256         Sag     9.7     4.0    20.0   
3  MPRAGE  1.0 x 1.0 x 1.25   256/256         Sag     9.7     4.0    20.0   
4  MPRAGE  1.0 x 1.0 x 1.25   256/256         Sag     9.7     4.0    20.0   

  Flip    mpr  
0   10  mpr-1  
1   10  mpr-2  
2   10  mpr-3 

In [17]:
print(metadata_df)

         SESSION ID AGE     M/F   HAND EDUC SES CDR MMSE     eTIV   ASF  \
0     OAS1_0448_MR1  22  Female  Right                    1524.00  1.15   
1     OAS1_0448_MR1  22  Female  Right                    1524.00  1.15   
2     OAS1_0448_MR1  22  Female  Right                    1524.00  1.15   
3     OAS1_0448_MR1  22  Female  Right                    1524.00  1.15   
4     OAS1_0449_MR1  71  Female  Right    3   4   0   29  1264.00  1.39   
...             ...  ..     ...    ...  ...  ..  ..  ...      ...   ...   
1683  OAS1_0086_MR1  47  Female  Right    4   1   0   30  1311.00  1.34   
1684  OAS1_0087_MR1  21  Female  Right                    1507.00  1.16   
1685  OAS1_0087_MR1  21  Female  Right                    1507.00  1.16   
1686  OAS1_0087_MR1  21  Female  Right                    1507.00  1.16   
1687  OAS1_0087_MR1  21  Female  Right                    1507.00  1.16   

       nWBV    TYPE      Vox res (mm) Rect. Fov Orientation TR (ms) TE (ms)  \
0     0.858  MPRAGE 

In [6]:
print(parse_txt_metadata("/Users/henrismidt/Documents/Informatik/Master/Alzheimer_Detection/Data/OASIS1/disc2/OAS1_0043_MR1/OAS1_0043_MR1.txt"))

{'SESSION ID': 'OAS1_0043_MR1', 'AGE': '21', 'M/F': 'Male', 'HAND': 'Right', 'EDUC': '', 'SES': '', 'CDR': '', 'MMSE': '', 'eTIV': '1511.00', 'ASF': '1.16', 'nWBV': '0.846', 'mpr-1': {'TYPE': 'MPRAGE', 'Vox res (mm)': '1.0 x 1.0 x 1.25', 'Rect. Fov': '256/256', 'Orientation': 'Sag', 'TR (ms)': '9.7', 'TE (ms)': '4.0', 'TI (ms)': '20.0', 'Flip': '10'}, 'mpr-2': {'TYPE': 'MPRAGE', 'Vox res (mm)': '1.0 x 1.0 x 1.25', 'Rect. Fov': '256/256', 'Orientation': 'Sag', 'TR (ms)': '9.7', 'TE (ms)': '4.0', 'TI (ms)': '20.0', 'Flip': '10'}, 'mpr-3': {'TYPE': 'MPRAGE', 'Vox res (mm)': '1.0 x 1.0 x 1.25', 'Rect. Fov': '256/256', 'Orientation': 'Sag', 'TR (ms)': '9.7', 'TE (ms)': '4.0', 'TI (ms)': '20.0', 'Flip': '10'}, 'mpr-4': {'TYPE': 'MPRAGE', 'Vox res (mm)': '1.0 x 1.0 x 1.25', 'Rect. Fov': '256/256', 'Orientation': 'Sag', 'TR (ms)': '9.7', 'TE (ms)': '4.0', 'TI (ms)': '20.0', 'Flip': '10'}}
