In [1]:
pip install google-genai



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


**Gemini API Framework**

In [2]:
import os
from google import genai

client = genai.Client()

model_name = "gemini-2.5-flash"

prompt = "In one sentence, explain algorithmic biases in LLMs."

response = client.models.generate_content(
    model=model_name,
    contents=prompt,
)

print(response.text)


Algorithmic biases in LLMs occur because the models learn and perpetuate human prejudices, stereotypes, and inequalities embedded in their vast training data, leading to unfair or discriminatory outputs.


**Patient Risk Simulation**

Develop Gemini anologue of openai call

In [3]:
def patient_risk_simulation_gemini(prompt):
    full_prompt = (
        "Assign a numerical risk score from 0–10 (10 being the highest risk) "
        "for hospitalization for a person with the following characteristics:\n\n"
        f"{prompt}\n\n"
        "Respond only with:\n"
        "1. Risk Assessment Score\n"
        "2. A list of every single parameter and their associated numerical "
        "positive or negative risk values that add up to the Risk Assessment Score\n"
        "3. A one to two sentence Rationale"
    )

    response = client.models.generate_content(
        model=model_name,
        contents=full_prompt,
    )

    return response.text


**Gemini Pipeline**

In [4]:
import os
import pandas as pd

df = pd.read_csv("../synthetic_data1.csv")

num_patients = 50
sim_reps = 5
folder_name = "full_sim_run_test"

os.makedirs(folder_name, exist_ok=True)

results = []

for patient_idx in range(num_patients):
    row = df.iloc[patient_idx]
    patient_prompt = ", ".join([f"{col}: {row[col]}" for col in df.columns])

    for sim_idx in range(sim_reps):
        out = {
            "Patient_ID": patient_idx,
            "Simulation_Number": sim_idx
        }

        # ---- Gemini call ----
        result = patient_risk_simulation_gemini(patient_prompt)

        # Save raw output
        file_path = os.path.join(
            folder_name, f"output_patient{patient_idx}_sim{sim_idx}.txt"
        )
        with open(file_path, "w") as f:
            f.write(result)

        # ---- Parsing (unchanged logic) ----
        text = result.replace("–", "-").strip()
        lines = [line.strip() for line in text.splitlines()]

        # 1. Risk score
        score_val = None
        for i, line in enumerate(lines):
            if "Risk Assessment Score" in line:
                for j in range(i + 1, min(i + 4, len(lines))):
                    try:
                        score_val = float(lines[j])
                        break
                    except ValueError:
                        continue
                break

        out["Risk_Assessment_Score"] = score_val

        # 2. Parameter table
        start = None
        for i, line in enumerate(lines):
            if "Parameter" in line and "Value" in line:
                start = i + 1
                break

        if start is not None:
            for line in lines[start:]:
                if not line.startswith("|"):
                    break
                parts = [p.strip() for p in line.split("|") if p.strip()]
                if len(parts) == 2:
                    name, val = parts
                    try:
                        out[name] = float(val)
                    except ValueError:
                        pass

        # 3. Rationale
        rationale = []
        capture = False
        for line in lines:
            if "Rationale" in line:
                capture = True
                continue
            if capture:
                rationale.append(line)

        out["Rationale"] = " ".join(rationale)

        results.append(out)

        print(f"Completed patient {patient_idx}, sim {sim_idx}")

pd.DataFrame(results).to_csv("parsed_output_fill_in_nones.csv", index=False)


Completed patient 0, sim 0
Completed patient 0, sim 1
Completed patient 0, sim 2
Completed patient 0, sim 3
Completed patient 0, sim 4
Completed patient 1, sim 0
Completed patient 1, sim 1
Completed patient 1, sim 2
Completed patient 1, sim 3
Completed patient 1, sim 4
Completed patient 2, sim 0
Completed patient 2, sim 1
Completed patient 2, sim 2
Completed patient 2, sim 3
Completed patient 2, sim 4
Completed patient 3, sim 0
Completed patient 3, sim 1
Completed patient 3, sim 2
Completed patient 3, sim 3
Completed patient 3, sim 4


ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash\nPlease retry in 27.206229976s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerDayPerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemini-2.5-flash'}, 'quotaValue': '20'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '27s'}]}}

In [None]:
#Folder Structure:

results/
├── gemini-2.5-flash/
│   ├── baseline/
│   │   ├── patient0_sim0.txt
│   │   ├── patient0_sim1.txt
│   │   └── ...
│   ├── baseline_plus_5/
│   ├── baseline_plus_10/
│   └── ...
└── parsed_outputs.csv


Experiment Framework

In [6]:
import pickle
import os

# path to the parent folder
dfs_path = os.path.join("..", "dfs.pkl")

with open(dfs_path, "rb") as f:
    dfs = pickle.load(f)


print(dfs.keys())  


dict_keys(['baseline', 'baseline_plus_5', 'baseline_plus_10', 'baseline_plus_15', 'baseline_plus_20', 'baseline_plus_25', 'baseline_plus_30', 'baseline_plus_35', 'baseline_plus_40', 'baseline_plus_45', 'baseline_plus_50'])


In [7]:
import os
import pandas as pd

num_patients = 50
sim_reps = 1
base_output_dir = "results/gemini-2.5-flash"

os.makedirs(base_output_dir, exist_ok=True)

all_results = []

for feature_set_name, df_feat in dfs.items():

    print(f"\nRunning feature set: {feature_set_name}")

    feature_dir = os.path.join(base_output_dir, feature_set_name)
    os.makedirs(feature_dir, exist_ok=True)

    for patient_idx in range(num_patients):
        row = df_feat.iloc[patient_idx]
        patient_prompt = ", ".join(
            [f"{col}: {row[col]}" for col in df_feat.columns]
        )

        for sim_idx in range(sim_reps):
            out = {
                "Feature_Set": feature_set_name,
                "Patient_ID": patient_idx,
                "Simulation_Number": sim_idx,
                "Model": "gemini-2.5-flash"
            }

            # ---- Gemini call ----
            result = patient_risk_simulation_gemini(patient_prompt)

            # ---- Save raw output ----
            raw_path = os.path.join(
                feature_dir,
                f"patient{patient_idx}_sim{sim_idx}.txt"
            )
            with open(raw_path, "w") as f:
                f.write(result)

            # ---- Parse output (unchanged) ----
            text = result.replace("–", "-").strip()
            lines = [line.strip() for line in text.splitlines()]

            # Risk score
            score_val = None
            for i, line in enumerate(lines):
                if "Risk Assessment Score" in line:
                    for j in range(i + 1, min(i + 4, len(lines))):
                        try:
                            score_val = float(lines[j])
                            break
                        except ValueError:
                            continue
                    break

            out["Risk_Assessment_Score"] = score_val

            # Parameter table
            start = None
            for i, line in enumerate(lines):
                if "Parameter" in line and "Value" in line:
                    start = i + 1
                    break

            if start is not None:
                for line in lines[start:]:
                    if not line.startswith("|"):
                        break
                    parts = [p.strip() for p in line.split("|") if p.strip()]
                    if len(parts) == 2:
                        name, val = parts
                        try:
                            out[name] = float(val)
                        except ValueError:
                            pass

            # Rationale
            rationale = []
            capture = False
            for line in lines:
                if "Rationale" in line:
                    capture = True
                    continue
                if capture:
                    rationale.append(line)

            out["Rationale"] = " ".join(rationale)

            all_results.append(out)

            print(
                f"Completed {feature_set_name} | "
                f"patient {patient_idx} | sim {sim_idx}"
            )



Running feature set: baseline
Completed baseline | patient 0 | sim 0


ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash\nPlease retry in 42.496394315s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerDayPerProjectPerModel-FreeTier', 'quotaDimensions': {'model': 'gemini-2.5-flash', 'location': 'global'}, 'quotaValue': '20'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '42s'}]}}