In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -q --upgrade google-generativeai

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/155.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m153.6/155.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m155.4/155.4 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import pandas as pd
import google.generativeai as genai
import time
import os

Step 2 : Configuration
Sets Gemini API key, input/output file paths, and periodic save intervals.

The input CSV contains patient records with disease, tumor, and treatment details.

The output file will include newly generated clinical notes and ICD-10 validations.

In [None]:
API_KEY = ""  # Replace this with your Gemini API key
INPUT_CSV_PATH = "/content/drive/MyDrive/brain_mri/brain_tumor_info_metrics.csv"
OUTPUT_CSV_PATH = "/content/drive/MyDrive/brain_mri/brain_tumor_info_clicicalnote.csv"
SAVE_INTERVAL = 5
WAIT_TIME = 40

Step 3: Load the Model & Resume Progress
Initializes the Gemini Pro model.

Checks if an existing output CSV exists ‚Äî resumes processing from the next unprocessed record.

Avoids repeating processed entries.

In [None]:
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel("gemini-2.5-pro")

Step 4: Define the Clinical Note Generation Function
Builds a structured prompt combining patient information with a clinical note format (O-A-P).

Includes ICD-10 code validation using Gemini‚Äôs medical knowledge.

Retries up to 3 times in case of API errors or quota issues.

Step 5: Generate Clinical Notes for Each Record
Loops through each patient record, generates notes, and saves to the output DataFrame.

Saves progress every 5 patients.

Waits 40 seconds between requests to avoid hitting rate limits.

In [None]:
df = pd.read_csv(INPUT_CSV_PATH)

if os.path.exists(OUTPUT_CSV_PATH):
    df_out = pd.read_csv(OUTPUT_CSV_PATH)
    if "GenAI_Clinical_Notes" not in df_out.columns:
        df_out["GenAI_Clinical_Notes"] = None
    print(f"üìÑ Found existing output file with {df_out['GenAI_Clinical_Notes'].notna().sum()} completed records.")
else:
    df_out = df.copy()
    df_out["GenAI_Clinical_Notes"] = None
    print("üÜï Starting fresh ‚Äî no previous output file found.")

processed_mask = df_out["GenAI_Clinical_Notes"].notna() & (df_out["GenAI_Clinical_Notes"] != "")
start_index = processed_mask.sum()
print(f"üîÅ Resuming from index {start_index}/{len(df_out)}\n")

def generate_clinical_data(row, retries=3):
    """Generate concise clinical note and ICD validation for one patient."""
    patient_data = f"""
    - Patient_ID: {row['patient_id']}
    - Disease Type: {row['disease_name']}
    - Diagnosis (Tumor SubType): {row['Tumor_SubType']}
    - Tumor Grade: {row['Tumor_Grade']}
    - Prescribed Treatment: {row['Treatment']}
    - Expected Outcome: {row['Outcome']}
    - Reference ICD-10 Code: {row['ICD_10_Code']}
    """

    prompt = f"""
    You are an expert oncologist and ICD-10 medical coder.
    Based on the structured data below, generate a concise **clinical note**
    and validate the ICD-10 code. Exclude subjective data.

    **Patient Data:**
    {patient_data}

    **Output Format:**
    **Clinical Note:**
    * **O (Objective):** [Brief diagnostic findings]
    * **A (Assessment):** [Diagnosis including tumor type and grade]
    * **P (Plan):** [Treatment plan and prognosis]

    **ICD-10-CM Code Validation:**
    * **Generated Code:** [Appropriate ICD-10 code]
    * **Code Description:** [Official description]
    * **Reference Code:** {row['ICD_10_Code']}
    * **Validation:** [Match, Mismatch, or Equivalent ‚Äî short reasoning]
    """

    for attempt in range(retries):
        try:
            response = model.generate_content(prompt)
            return response.text.strip()
        except Exception as e:
            if "429" in str(e):
                wait_time = 60
                print(f"\n‚ö†Ô∏è Quota reached ‚Äî waiting {wait_time}s...")
                for i in range(wait_time):
                    if i % 5 == 0:
                        print(".", end="", flush=True)
                    time.sleep(1)
                print()
            else:
                print(f"\n‚ö†Ô∏è API Error (Attempt {attempt+1}/{retries}): {e}")
                time.sleep(5)
    return "Error: API call failed after multiple retries."

for index in range(start_index, len(df_out)):
    row = df_out.iloc[index]
    print(f"\nüß† Processing patient {index + 1}/{len(df_out)} (ID: {row['patient_id']})")

    try:
        generated_text = generate_clinical_data(row)
        df_out.at[index, "GenAI_Clinical_Notes"] = generated_text
        print(f"‚úÖ Completed record {index + 1}")
    except KeyboardInterrupt:
        print("\nüõë Interrupted manually. Saving current progress...")
        break
    except Exception as e:
        print(f"‚ö†Ô∏è Error at record {index + 1}: {e}")
        df_out.at[index, "GenAI_Clinical_Notes"] = f"Error: {e}"

    if (index + 1) % SAVE_INTERVAL == 0 or index == len(df_out) - 1:
        df_out.to_csv(OUTPUT_CSV_PATH, index=False)
        print(f"üíæ Progress saved ({index + 1}/{len(df_out)} records).")

    print(f"‚è≥ Waiting {WAIT_TIME}s before next call...")
    for i in range(WAIT_TIME):
        if i % 5 == 0:
            print(".", end="", flush=True)
        time.sleep(1)
    print()


Step 6: Save Final Results
Saves the DataFrame containing all patients with their GenAI_Clinical_Notes.

Ensures no data is lost even after manual interruption.

In [None]:

df_out.to_csv(OUTPUT_CSV_PATH, index=False)
print(f"\n‚úÖ All results saved successfully to:\nüìÅ {OUTPUT_CSV_PATH}")


üìÑ Found existing output file with 373 completed records.
üîÅ Resuming from index 373/396


üß† Processing patient 374/396 (ID: Patient_374)
‚úÖ Completed record 374
‚è≥ Waiting 40s before next call...
........

üß† Processing patient 375/396 (ID: Patient_375)
‚úÖ Completed record 375
üíæ Progress saved (375/396 records).
‚è≥ Waiting 40s before next call...
........

üß† Processing patient 376/396 (ID: Patient_376)
‚úÖ Completed record 376
‚è≥ Waiting 40s before next call...
........

üß† Processing patient 377/396 (ID: Patient_377)
‚úÖ Completed record 377
‚è≥ Waiting 40s before next call...
........

üß† Processing patient 378/396 (ID: Patient_378)
‚úÖ Completed record 378
‚è≥ Waiting 40s before next call...
........

üß† Processing patient 379/396 (ID: Patient_379)
‚úÖ Completed record 379
‚è≥ Waiting 40s before next call...
........

üß† Processing patient 380/396 (ID: Patient_380)
‚úÖ Completed record 380
üíæ Progress saved (380/396 records).
‚è≥ Waiting 40s before nex

Step 7: Display Sample Output
Loads the final CSV and prints three sample AI-generated notes.

Shows structured O-A-P format with ICD-10 code validation.

In [15]:
import pandas as pd

OUTPUT_CSV_PATH = "/content/drive/MyDrive/brain_mri/brain_tumor_info_clicicalnote.csv"
df = pd.read_csv(OUTPUT_CSV_PATH)

print("ü©∫ Sample Generated Clinical Notes:\n")

valid_notes = df[df["GenAI_Clinical_Notes"].notna() & ~df["GenAI_Clinical_Notes"].str.contains("Error", na=False)]

for i, row in valid_notes.head(3).iterrows():
    print(f"--- Patient {i+1} | ID: {row['patient_id']} ---")
    print(row["GenAI_Clinical_Notes"])
    print("\n" + "-"*80 + "\n")


ü©∫ Sample Generated Clinical Notes:

--- Patient 1 | ID: Patient_01 ---
**Clinical Note:**
*   **O (Objective):** Diagnostic findings confirm a pituitary mass.
*   **A (Assessment):** Benign pituitary adenoma (WHO Grade I).
*   **P (Plan):** Treatment with medication and/or transsphenoidal surgery is indicated. Prognosis is excellent for hormonal recovery.

**ICD-10-CM Code Validation:**
*   **Generated Code:** D35.2
*   **Code Description:** Benign neoplasm of pituitary gland
*   **Reference Code:** D35.2
*   **Validation:** Match ‚Äî This code is the correct and specific representation for a benign pituitary adenoma.

--------------------------------------------------------------------------------

--- Patient 2 | ID: Patient_02 ---
**Clinical Note:**
*   **O (Objective):** Histopathology confirms a glial brain tumor.
*   **A (Assessment):** Diffuse Astrocytoma, WHO Grade II (Low-Grade Glioma).
*   **P (Plan):** Proceed with surgical resection followed by adjuvant radiation therapy

In [14]:
import pandas as pd
import re

INPUT_CSV = "/content/drive/MyDrive/brain_mri/brain_tumor_info_clicicalnote.csv"
OUTPUT_CSV = "/content/drive/MyDrive/brain_mri/brain_tumor_notes.csv"

df = pd.read_csv(INPUT_CSV)

df_notes = df[["patient_id", "enhanced_image_path", "GenAI_Clinical_Notes"]].copy()

def clean_text(text):
    if pd.isna(text):
        return ""

    text = re.sub(r"[√¢‚Ç¨‚Ñ¢√¢‚Ç¨‚Äú√¢‚Ç¨‚Äù]", "'", text)
    text = re.sub(r"[‚Äú‚Äù]", '"', text)
    text = re.sub(r"\s+", " ", text).strip()

    text = text.replace("**Clinical Note:**", "Clinical Note:")
    text = text.replace("* **O (Objective):**", "O (Objective):")
    text = text.replace("* **A (Assessment):**", "A (Assessment):")
    text = text.replace("* **P (Plan):**", "P (Plan):")
    text = text.replace("**ICD-10-CM Code Validation:**", "ICD-10-CM Code Validation:")
    text = text.replace("* **Generated Code:**", "Generated Code:")
    text = text.replace("* **Code Description:**", "Code Description:")
    text = text.replace("* **Reference Code:**", "Reference Code:")
    text = text.replace("* **Validation:**", "Validation:")

    text = re.sub(r"\*+", "", text)
    text = re.sub(r"\"", "", text)

    text = text.replace("Clinical Note:", "\nClinical Note:\n")
    text = text.replace("O (Objective):", "\nO (Objective): ")
    text = text.replace("A (Assessment):", "\nA (Assessment): ")
    text = text.replace("P (Plan):", "\nP (Plan): ")
    text = text.replace("ICD-10-CM Code Validation:", "\nICD-10-CM Code Validation:\n")
    text = text.replace("Generated Code:", "\nGenerated Code: ")
    text = text.replace("Code Description:", "\nCode Description: ")
    text = text.replace("Reference Code:", "\nReference Code: ")
    text = text.replace("Validation:", "\nValidation: ")

    return text.strip()

df_notes["Cleaned_Clinical_Note"] = df_notes["GenAI_Clinical_Notes"].apply(clean_text)

df_notes = df_notes.drop(columns=["GenAI_Clinical_Notes"])

df_notes.to_csv(OUTPUT_CSV, index=False)

print(f"‚úÖ Cleaned and structured clinical notes saved to:\nüìÅ {OUTPUT_CSV}")
print("\nüìÑ Preview of cleaned data:\n")
print(df_notes.head(3))


‚úÖ Cleaned and structured clinical notes saved to:
üìÅ /content/drive/MyDrive/brain_mri/brain_tumor_notes.csv

üìÑ Preview of cleaned data:

   patient_id                                enhanced_image_path  \
0  Patient_01  /content/drive/MyDrive/brain_mri/enhanced_imag...   
1  Patient_02  /content/drive/MyDrive/brain_mri/enhanced_imag...   
2  Patient_03  /content/drive/MyDrive/brain_mri/enhanced_imag...   

                               Cleaned_Clinical_Note  
0  Clinical Note:\n \nO (Objective):  Diagnostic ...  
1  Clinical Note:\n \nO (Objective):  Histopathol...  
2  Clinical Note:\n \nO (Objective):  Pathologica...  
