In [1]:
pip install google-genai


Collecting google-genai
  Downloading google_genai-1.57.0-py3-none-any.whl (713 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m713.3/713.3 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting google-auth[requests]<3.0.0,>=2.46.0
  Downloading google_auth-2.47.0-py3-none-any.whl (234 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m234.9/234.9 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
Collecting tenacity<9.2.0,>=8.2.3
  Downloading tenacity-9.1.2-py3-none-any.whl (28 kB)
Collecting websockets<15.1.0,>=13.0.0
  Downloading websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl (173 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.3/173.3 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
Collecting pyasn1-modules>=0.2.1
  Downloading pyasn1_modules-0.4.2-py3-none-any.whl (181 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.3/181.3 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollec

**Gemini API Framework**

In [6]:
import os
from google import genai

client = genai.Client()

model_name = "gemini-2.5-flash"

prompt = "In one sentence, explain algorithmic biases in LLMs."

response = client.models.generate_content(
    model=model_name,
    contents=prompt,
)

print(response.text)


Algorithmic biases in LLMs occur when the models learn and perpetuate harmful stereotypes and inequities embedded within their vast training datasets, leading to discriminatory or unfair outputs.


**Patient Risk Simulation**

Develop Gemini anologue of openai call

In [4]:
def patient_risk_simulation_gemini(prompt):
    full_prompt = (
        "Assign a numerical risk score from 0–10 (10 being the highest risk) "
        "for hospitalization for a person with the following characteristics:\n\n"
        f"{prompt}\n\n"
        "Respond only with:\n"
        "1. Risk Assessment Score\n"
        "2. A list of every single parameter and their associated numerical "
        "positive or negative risk values that add up to the Risk Assessment Score\n"
        "3. Rationale"
    )

    response = client.models.generate_content(
        model=model_name,
        contents=full_prompt,
    )

    return response.text


**Gemini Pipeline**

In [5]:
import os
import pandas as pd

df = pd.read_csv("synthetic_data1.csv")

num_patients = 50
sim_reps = 5
folder_name = "full_sim_run_1"

os.makedirs(folder_name, exist_ok=True)

results = []

for patient_idx in range(num_patients):
    row = df.iloc[patient_idx]
    patient_prompt = ", ".join([f"{col}: {row[col]}" for col in df.columns])

    for sim_idx in range(sim_reps):
        out = {
            "Patient_ID": patient_idx,
            "Simulation_Number": sim_idx
        }

        # ---- Gemini call ----
        result = patient_risk_simulation_gemini(patient_prompt)

        # Save raw output
        file_path = os.path.join(
            folder_name, f"output_patient{patient_idx}_sim{sim_idx}.txt"
        )
        with open(file_path, "w") as f:
            f.write(result)

        # ---- Parsing (unchanged logic) ----
        text = result.replace("–", "-").strip()
        lines = [line.strip() for line in text.splitlines()]

        # 1. Risk score
        score_val = None
        for i, line in enumerate(lines):
            if "Risk Assessment Score" in line:
                for j in range(i + 1, min(i + 4, len(lines))):
                    try:
                        score_val = float(lines[j])
                        break
                    except ValueError:
                        continue
                break

        out["Risk_Assessment_Score"] = score_val

        # 2. Parameter table
        start = None
        for i, line in enumerate(lines):
            if "Parameter" in line and "Value" in line:
                start = i + 1
                break

        if start is not None:
            for line in lines[start:]:
                if not line.startswith("|"):
                    break
                parts = [p.strip() for p in line.split("|") if p.strip()]
                if len(parts) == 2:
                    name, val = parts
                    try:
                        out[name] = float(val)
                    except ValueError:
                        pass

        # 3. Rationale
        rationale = []
        capture = False
        for line in lines:
            if "Rationale" in line:
                capture = True
                continue
            if capture:
                rationale.append(line)

        out["Rationale"] = " ".join(rationale)

        results.append(out)

        print(f"Completed patient {patient_idx}, sim {sim_idx}")

pd.DataFrame(results).to_csv("parsed_output_fill_in_nones.csv", index=False)


Completed patient 0, sim 0
Completed patient 0, sim 1
Completed patient 0, sim 2
Completed patient 0, sim 3
Completed patient 0, sim 4


KeyboardInterrupt: 