<a href="https://colab.research.google.com/github/Ravikrishnan05/PrediscanMedtech_project/blob/main/dicom_to_png.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
# List the contents of your mounted drive to find the shared drive
!ls /content/drive/

MyDrive


In [12]:
# Mount your Google Drive
from google.colab import drive
drive.mount('/content/drive')

# List the contents of your mounted drive to find the shared drive directory
!ls /content/drive/MyDrive/retinal_photography

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
 cfp   ir		      manifest.gsheet   Selected_1000_Images
 faf  'manifest (1).gsheet'   manifest.tsv


In [13]:
!ls /content/drive/MyDrive/retinal_photography/'manifest (1).gsheet'

'/content/drive/MyDrive/retinal_photography/manifest (1).gsheet'


In [15]:
import pandas as pd
import os
from google.colab import drive
 # drive.mount('/content/drive') # Only need if not already mounted

# --- CONFIGURATION: UPDATED PATH & SEPARATOR ---

# Path to your TSV file on Google Drive
# Check if manifest.tsv is indeed the file with the columns you showed earlier!
data_file_path = '/content/drive/MyDrive/retinal_photography/manifest.tsv'
SEPARATOR = '\t' # Use '\t' for TSV, or ',' for CSV

# --- END OF CONFIGURATION ---


def print_header(title):
    print("\n" + "="*50)
    print(f" {title.upper()} ".center(50, "="))
    print("="*50)

if __name__ == "__main__":
    print(f"Attempting to load data from: {data_file_path}")
    try:
         # NOTE the sep=SEPARATOR parameter here
        df = pd.read_csv(data_file_path, sep=SEPARATOR)
        # Check if 'filepath' column exists, crucial for path logic later
        if 'filepath' not in df.columns:
             print(f"\nERROR: Column 'filepath' not found in {data_file_path}. Columns are:")
             print(df.columns.tolist())
             exit()
        # Ensure all filepaths start with the expected base, fix if the TSV is different
        # The code expects /retinal_photography/cfp/... based on your first example
        if not str(df['filepath'].iloc[0]).startswith('/retinal_photography/'):
             print("\nWARNING: Filepaths do not start with '/retinal_photography/'. Adapting...")
             # Add adapter logic here if needed, or print error
             print(f"Example path: {df['filepath'].iloc[0]}")
             # For now, we'll assume the original code's path replacement logic is sufficient
             # but the analysis below might look slightly off if paths are weird.


    except FileNotFoundError:
        print(f"\nERROR: The file was not found at: {data_file_path}")
        exit()
    except Exception as e:
         print(f"\nERROR loading file. Is it really a TSV? Error: {e}")
         exit()


    print(f"\nSuccessfully loaded the file. First 5 rows head:\n{df.head()}")
    print("\nColumns found:", df.columns.tolist())


    print_header("Overall Summary")
    total_files = len(df)
     # Check if column exists before using it
    unique_participants = df['participant_id'].nunique() if 'participant_id' in df.columns else 'N/A'
    print(f"Total number of DICOM file entries: {total_files}")
    print(f"Total number of unique participants: {unique_participants}")

    if 'imaging' in df.columns:
        print_header("Distribution by Imaging Type")
        print(df['imaging'].value_counts().to_string())

     # Verification by Filepath always works
    print("\n--- Verification by Filepath (cfp/faf/ir) ---")
    df['folder_type'] = df['filepath'].apply(lambda x: 'cfp' if '/cfp/' in str(x) else ('faf' if '/faf/' in str(x) else ('ir' if '/ir/' in str(x) else 'unknown')))
    print(df['folder_type'].value_counts().to_string())


    if 'manufacturer' in df.columns:
        print_header("Distribution by Manufacturer")
        print(df['manufacturer'].value_counts().to_string())

    if 'laterality' in df.columns:
        print_header("Distribution by Laterality")
        print(df['laterality'].value_counts().to_string())

    if 'height' in df.columns and 'width' in df.columns:
        print_header("Summary of Image Dimensions (Height/Width)")
        print(df[['height', 'width']].describe().round(0))
    print("\nAnalysis Complete.")

Attempting to load data from: /content/drive/MyDrive/retinal_photography/manifest.tsv

Successfully loaded the file. First 5 rows head:
   participant_id manufacturer manufacturers_model_name laterality  \
0            1001        iCare                    Eidon          L   
1            1001        iCare                    Eidon          R   
2            1001        iCare                    Eidon          L   
3            1001        iCare                    Eidon          R   
4            1001        iCare                    Eidon          L   

  anatomic_region            imaging  height  width  color_channel_dimension  \
0          Mosaic  Color Photography    1836   3293                        3   
1          Mosaic  Color Photography    1837   3314                        3   
2          Macula  Color Photography    3288   3680                        3   
3          Macula  Color Photography    3288   3680                        3   
4           Nasal  Color Photography    328

In [None]:
import pandas as pd
import pydicom
from PIL import Image
import numpy as np
import os
from tqdm.auto import tqdm # For the progress bar
from google.colab import drive

# Mount drive if not already mounted.
# If it's already mounted, this will just confirm it.
drive.mount('/content/drive')

# Ensure all necessary packages are installed
!pip install -q pandas pydicom pillow numpy tqdm

# --- CONFIGURATION (VERIFIED AND CORRECT FOR YOUR SETUP) ---

# 1. Path to your data file on Google Drive
data_file_path = '/content/drive/MyDrive/retinal_photography/manifest.tsv'
SEPARATOR = '\t'

# 2. The PARENT directory where 'retinal_photography' folder is.
input_parent_dir = '/content/drive/MyDrive'
output_parent_dir = '/content/drive/MyDrive'

# 3. Define the source and target folder names for creating the new structure
source_folder_name = 'retinal_photography'
target_folder_name = 'retinal_photography_png' # Will be created at /content/drive/MyDrive/retinal_photography_png

# --- END OF CONFIGURATION ---

def convert_dicom_to_png(dicom_filepath, png_filepath):
    """Reads DICOM, converts pixel data, saves as high-quality PNG."""
    try:
        ds = pydicom.dcmread(dicom_filepath)
        pixel_array = ds.pixel_array

        # Apply Windowing if available for better contrast in non-color (monochrome) images
        if 'WindowCenter' in ds and 'WindowWidth' in ds and ds.PhotometricInterpretation != "RGB":
            window_center = float(ds.WindowCenter)
            window_width = float(ds.WindowWidth)
            img_min = window_center - window_width / 2.0
            img_max = window_center + window_width / 2.0
            pixel_array = np.clip(pixel_array, img_min, img_max)

        # Normalize pixel data to 8-bit range (0-255) for standard PNG saving
        if pixel_array.dtype != np.uint8:
            if np.max(pixel_array) > np.min(pixel_array):
                pixel_array = pixel_array.astype(float)
                pixel_array = (pixel_array - np.min(pixel_array)) / (np.max(pixel_array) - np.min(pixel_array))
                pixel_array = (pixel_array * 255).astype(np.uint8)
            else:
                pixel_array = np.zeros_like(pixel_array, dtype=np.uint8)

        image = Image.fromarray(pixel_array)
        os.makedirs(os.path.dirname(png_filepath), exist_ok=True)
        image.save(png_filepath, 'PNG')
        return True, None
    except FileNotFoundError:
        return False, "DICOM File Not Found at path"
    except Exception as e:
        return False, str(e)


# --- Main Execution ---
if __name__ == "__main__":
    print("Starting DICOM to PNG conversion process...")
    try:
        df = pd.read_csv(data_file_path, sep=SEPARATOR)
    except Exception as e:
        print(f"FATAL ERROR: Could not load the manifest file: {e}")
        exit()

    print(f"Found {len(df)} files to process from the manifest.")

    # Create the main output directory to check for write permissions early
    output_base_dir = os.path.join(output_parent_dir, target_folder_name)
    try:
        os.makedirs(output_base_dir, exist_ok=True)
        print(f"Output will be saved in: {output_base_dir}")
    except Exception as e:
        print(f"PERMISSION ERROR: Cannot create output directory '{output_base_dir}'.")
        print("Please ensure you have 'Editor' or 'Contributor' access to the Google Drive location.")
        exit()

    success_count = 0
    error_count = 0
    failed_files = []

    # Use tqdm for a live progress bar
    for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Converting Files"):
        relative_filepath = str(row['filepath'])

        if relative_filepath.startswith('/'):
            relative_filepath = relative_filepath[1:]

        # Construct the full path to the source DICOM
        dicom_path = os.path.join(input_parent_dir, relative_filepath)

        # Construct the full path for the destination PNG
        target_relative_path = relative_filepath.replace(source_folder_name, target_folder_name, 1)
        png_base_path, _ = os.path.splitext(target_relative_path)
        png_relative_path_with_ext = png_base_path + '.png'
        png_path = os.path.join(output_parent_dir, png_relative_path_with_ext)

        # Perform the conversion
        success, error_message = convert_dicom_to_png(dicom_path, png_path)

        if success:
            success_count += 1
        else:
            error_count += 1
            failed_files.append((dicom_path, error_message))

    print("\n\n--- CONVERSION COMPLETE ---")
    print(f"Successfully converted: {success_count} files.")
    print(f"Failed to convert: {error_count} files.")
    print(f"All new PNG files are saved in the folder: '{output_base_dir}'")

    if error_count > 0:
        print("\n--- List of Failed Files ---")
        for f_path, reason in failed_files:
            print(f"- Path: {f_path}\n  Reason: {reason}\n")