# Image Processing with Labels

This notebook processes patient images by adding labels based on the information in an Excel file.

In [None]:
import os
import pandas as pd
from PIL import Image
import cairosvg
import io

# Set up paths
path = "C:/Users/janni/OneDrive/Dokumente/PostDoc/Projects/Patho Prompt Injection/First_Dataset"
input_file = os.path.join(path, "Patient_Metadata_long.xlsx")
output_folder = os.path.join(path, "output_images")
os.makedirs(output_folder, exist_ok=True)

In [None]:
def create_label_dict(path):
    def svg_path(filename):
        return os.path.join(path, f"{filename}.svg")
    
    label_dict = {
        "0/1": svg_path("0_1"),
        "1/1": svg_path("1_1"),
        "0/2": svg_path("0_2"),
        "1/2": svg_path("1_2"),
        "2/2": svg_path("2_2"),
        "0/3": svg_path("0_3"),
        "1/3": svg_path("1_3"),
        "0/4": svg_path("0_4"),
        "1/4": svg_path("1_4"),
        "2/4": svg_path("2_4"),
        "0/5": svg_path("0_5"),
        "1/5": svg_path("1_5"),
        "pT1": svg_path("pT1"),
        "pT2": svg_path("pT2"),
        "pT3": svg_path("pT3"),
        "BRAF mut": svg_path("BRAF_mut"),
        "wildtype": svg_path("wildtype"),
        "RAS mut": svg_path("RAS_mut")
    }
    
    missing_files = [label for label, file_path in label_dict.items() if not os.path.exists(file_path)]
    if missing_files:
        print(f"Warning: The following SVG files are missing: {', '.join(missing_files)}")
    else:
        print("All SVG files found successfully.")
    
    return label_dict

# Create the label dictionary
label_dict = create_label_dict(path)

# Print the label_dict for verification
for label, file_path in label_dict.items():
    print(f"{label}: {file_path}")

In [None]:
def process_image(row, label_dict, base_path, output_folder):
    image_path = os.path.join(base_path, row['Patient_ID_File_Name'])
    output_path = os.path.join(output_folder, f"{row['Study_ID']}_{row['Label_Type']}.png")
    
    # Open the base image
    base_image = Image.open(image_path).convert('RGBA')
    
    if row['Label_Type'] != 'none':
        # Determine which prompt to use
        prompt = row['True_Prompt'] if row['Label_Type'] == 'true' else row['False_Prompt']
        
        # Get the SVG path from the label_dict
        svg_path = label_dict.get(prompt)
        
        if svg_path and os.path.exists(svg_path):
            # Convert SVG to PNG
            png_data = cairosvg.svg2png(url=svg_path)
            label_image = Image.open(io.BytesIO(png_data)).convert('RGBA')
            
            # Resize label if necessary
            label_image = label_image.resize((base_image.width, base_image.height // 5))
            
            # Create a new image with room for both the base image and the label
            new_image = Image.new('RGBA', (base_image.width, base_image.height + label_image.height))
            
            # Paste the base image and the label
            new_image.paste(base_image, (0, 0))
            new_image.paste(label_image, (0, base_image.height), mask=label_image)
        else:
            print(f"Warning: SVG file not found for prompt '{prompt}'")
            new_image = base_image
    else:
        new_image = base_image
    
    # Save the image
    new_image.save(output_path, 'PNG')
    print(f"Processed: {output_path}")

In [None]:
# Read the Excel file
df = pd.read_excel(input_file)

# Process each row
for _, row in df.iterrows():
    process_image(row, label_dict, path, output_folder)

print("Image processing complete.")