In [16]:
import pandas as pd
import os
import pydicom

In [17]:
def get_dicom_dimensions(dicom_filepath):
    # Read the DICOM file
    dicom = pydicom.dcmread(dicom_filepath)

    # Get the pixel dimensions
    width = dicom.Columns
    height = dicom.Rows

    return width, height

In [18]:
def create_text_files(df, dicom_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Group the DataFrame by image_id
    grouped = df.groupby('image_id')

    # Iterate through groups
    for name, group in grouped:
        # Check if any NaN values are present in required columns
        if group[['x_min', 'x_max', 'y_min', 'y_max']].isnull().any().any():
            continue

        # Get the dimensions of the DICOM image
        dicom_filepath = os.path.join(dicom_folder, f"{name}.dicom")
        width, height = get_dicom_dimensions(dicom_filepath)

        # Create a text file for each image
        output_path = os.path.join(output_folder, f"{name}.txt")

        with open(output_path, 'w') as file:
            # Iterate through records in the group
            for _, row in group.iterrows():
                class_id = row['class_id']
                
                # Normalize coordinates using image dimensions
                x_center = (row['x_min'] + row['x_max']) / 2 / width
                y_center = (row['y_min'] + row['y_max']) / 2 / height
                norm_width = (row['x_max'] - row['x_min']) / width
                norm_height = (row['y_max'] - row['y_min']) / height

                # Write the information to the text file
                file.write(f"{class_id} {x_center} {y_center} {norm_width} {norm_height}\n")

In [19]:
if __name__ == "__main__":
    # Load your DataFrame from the CSV file
    df = pd.read_csv('train.csv')
    # Specify the folder containing DICOM images
    dicom_folder = 'C:/Users/Alex/Desktop/SENECA/Semester 9/BDC800_Capstone/Capstone Project/DeepRadiology/dataset/images/dicom'
    # Specify the output folder for text files
    output_folder = 'C:/Users/Alex/Desktop/SENECA/Semester 9/BDC800_Capstone/Capstone Project/DeepRadiology/dataset/annotations'
    # Create text files
    create_text_files(df, dicom_folder, output_folder)
    print('Complete')

Complete
