In [None]:
import pydicom

In [None]:
def extract_dicom_metadata(file_path):
    try:
        # Load the DICOM file with force=True to handle files without the standard header
        dicom_file = pydicom.dcmread(file_path, force=True)
        
        # Extract specific metadata
        patient_id = dicom_file.get('PatientID', 'Unknown')
        birth_date = dicom_file.get('PatientBirthDate', 'Unknown')
        study_date = dicom_file.get('StudyDate', 'Unknown')
        sex = dicom_file.get('PatientSex', 'Unknown')
        
        # Return the extracted metadata
        return {
            "Patient ID": patient_id,
            "Birth Date": birth_date,
            "Study Date": study_date,
            "Sex": sex
        }
    except Exception as e:
        return {"Error": f"An error occurred while reading the DICOM file: {str(e)}"}

Example usage:

In [None]:
file_path = r"C:\Users\acer\Desktop\Data\57-2014\47-4881 2014-9 L dicom\_Z\SLZ+000.dcm"
metadata = extract_dicom_metadata(file_path)
print(metadata)

Iterate through data

In [None]:
import os
import pandas as pd

metadata_list = []

# Base directory containing the 'Data' folder
base_directory = r"C:\Users\acer\Desktop\Data"

# Loop through each year folder
for year_folder in os.listdir(base_directory):
    year_folder_path = os.path.join(base_directory, year_folder)
    
    # Check if it's a directory
    if os.path.isdir(year_folder_path):
        # Loop through each patient folder in the year folder
        for patient_folder in os.listdir(year_folder_path):
            patient_folder_path = os.path.join(year_folder_path, patient_folder)
            
            # Check if it's a directory
            if os.path.isdir(patient_folder_path):
                # Path to the _Z folder
                z_folder_path = os.path.join(patient_folder_path, '_Z')
                
                # Check if _Z folder exists
                if os.path.isdir(z_folder_path):
                    dcm_file_path = os.path.join(z_folder_path, 'SLZ+000.dcm')
                    
                    # Check if the DCM file exists
                    if os.path.isfile(dcm_file_path):
                        # Apply the function to the DCM file
                        metadata = extract_dicom_metadata(dcm_file_path)
                        if metadata:
                            metadata_list.append(metadata)

PatientDF = pd.DataFrame(metadata_list)

In [None]:
import pandas as pd

PatientDF = pd.DataFrame(metadata_list)
PatientDF['Birth Date'] = pd.to_datetime(PatientDF['Birth Date'], format='%Y%m%d', errors='coerce')
PatientDF['Study Date'] = pd.to_datetime(PatientDF['Study Date'], format='%Y%m%d', errors='coerce')

PatientDF

In [None]:
PatientDF['Age at Study'] = (PatientDF['Study Date'] - PatientDF['Birth Date']).dt.days // 365

In [None]:
sex_counts = PatientDF['Sex'].value_counts()
print(sex_counts)

In [None]:
PatientDF

In [None]:
Patient_grouped = PatientDF.groupby('Patient ID')

In [None]:
Patient_grouped.head()

In [None]:
PatientDF['Age at Study'].describe()

In [None]:
import matplotlib.pyplot as plt

In [None]:
filtered_ages = PatientDF['Age at Study'][PatientDF['Age at Study'].notna() & (PatientDF['Age at Study'] != 0)]

In [None]:
# Plotting the histogram for the 'Age' column
filtered_ages.hist(bins=60, edgecolor='black')

# Adding labels and title
plt.xlabel('Age')
plt.ylabel('Frequency')

# Show the plot
plt.show()
