In [1]:

import os
import pydicom
import sqlite3

print("Libraries imported successfully.")

Libraries imported successfully.


In [2]:
# ... (imports from above)

# The name of our database file
DATABASE_NAME = 'medical_images.db'

# The path to the folder containing our DICOM images
IMAGE_FOLDER = 'dicom_images'

# --- DATABASE SETUP ---
def setup_database():
    """
    Connects to the SQLite database and creates the 'images' table if it doesn't exist.
    """
    # This command connects to the database. If the file doesn't exist, it will be created.
    conn = sqlite3.connect(DATABASE_NAME)
    
    # A 'cursor' is like a remote control for the database to execute commands.
    cursor = conn.cursor()
    
    # This is our SQL command to create a table.
    # We use "IF NOT EXISTS" so it doesn't cause an error if we run the script again.
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS images (
        id INTEGER PRIMARY KEY,
        patient_id TEXT,
        study_date TEXT,
        modality TEXT,
        file_path TEXT UNIQUE
    )
    ''')
    
    # We 'commit' (save) the changes and close the connection.
    conn.commit()
    conn.close()
    
    print(f"Database '{DATABASE_NAME}' is set up.")

# --- Call the function to set up the database ---
setup_database()

Database 'medical_images.db' is set up.


In [3]:
# ... (imports and database setup from above)

# --- METADATA EXTRACTION AND SAVING ---
def process_and_store_images():
    """
    Scans the image folder, extracts metadata from each DICOM file,
    and stores it in the database.
    """
    conn = sqlite3.connect(DATABASE_NAME)
    cursor = conn.cursor()
    
    print("\nStarting to scan for DICOM files...")
    
    # os.walk goes through a folder and all its subfolders.
    # 'root' is the current folder path, 'dirs' are subfolders, 'files' are the filenames.
    for root, dirs, files in os.walk(IMAGE_FOLDER):
        for filename in files:
            # We only care about files ending in '.dcm'
            if filename.endswith('.dcm'):
                file_path = os.path.join(root, filename)
                
                try:
                    # pydicom.dcmread() reads the DICOM file.
                    dicom_data = pydicom.dcmread(file_path)
                    
                    # Extract metadata. We use .get() as a safety measure.
                    # If a tag doesn't exist, it will return 'N/A' instead of crashing.
                    patient_id = dicom_data.get('PatientID', 'N/A')
                    study_date = dicom_data.get('StudyDate', 'N/A')
                    modality = dicom_data.get('Modality', 'N/A')
                    
                    # This is our SQL command to insert data into the table.
                    # The '?' are placeholders to safely insert our variables.
                    # "OR IGNORE" tells SQL to do nothing if we try to insert a file_path that already exists.
                    cursor.execute('''
                    INSERT OR IGNORE INTO images (patient_id, study_date, modality, file_path)
                    VALUES (?, ?, ?, ?)
                    ''', (patient_id, study_date, modality, file_path))
                    
                    print(f"  - Processed: {filename}")

                except Exception as e:
                    # If a file is not a valid DICOM file or is corrupted, we print an error.
                    print(f"  - Could not process {filename}. Error: {e}")

    conn.commit()
    conn.close()
    
    print("\nFinished processing all files.")

# --- Call the main processing function ---
process_and_store_images()


Starting to scan for DICOM files...
  - Processed: 1-004.dcm
  - Processed: 1-005.dcm
  - Processed: 1-006.dcm
  - Processed: 1-007.dcm
  - Processed: 1-008.dcm
  - Processed: 1-009.dcm
  - Processed: 1-010.dcm
  - Processed: 1-011.dcm
  - Processed: 1-012.dcm
  - Processed: 1-013.dcm
  - Processed: 1-014.dcm
  - Processed: 1-015.dcm
  - Processed: 1-016.dcm
  - Processed: 1-017.dcm
  - Processed: 1-018.dcm
  - Processed: 1-019.dcm
  - Processed: 1-020.dcm
  - Processed: 1-021.dcm
  - Processed: 1-022.dcm
  - Processed: 1-023.dcm
  - Processed: 1-024.dcm
  - Processed: 1-025.dcm
  - Processed: 1-026.dcm
  - Processed: 1-027.dcm
  - Processed: 1-028.dcm
  - Processed: 1-029.dcm
  - Processed: 1-033.dcm
  - Processed: 1-034.dcm
  - Processed: 1-035.dcm
  - Processed: 1-036.dcm
  - Processed: 1-037.dcm
  - Processed: 1-038.dcm
  - Processed: 1-039.dcm
  - Processed: 1-040.dcm
  - Processed: 1-041.dcm
  - Processed: 1-042.dcm
  - Processed: 1-043.dcm
  - Processed: 1-044.dcm
  - Processed