# Load patient data and imports

In [1]:
import requests
import json
import pandas
import datetime
import time
import warnings
warnings.filterwarnings('ignore')

_url = "http://localhost:11434/api/generate"
_headers = {"Content-Type": "application/json"}
_model = "gemma3:12b"
patients_original = pandas.read_csv("data/allergy_patients.csv", low_memory=False)
print(f"Patients: {len(patients_original):,}")

# Shuffle data
patients_shuffle = patients_original.sample(frac=1).reset_index(drop=True)

group_count = 1000
run_count = 3

allergy_patients = patients_shuffle[patients_shuffle["is_allergy"] == 1].sample(n=2, random_state=1)
chain_prompt_1 = f"You will be provided with information about a patient with the end goal of determining if they have an allergy. You will look at the data and then respond with your first thought about what the data is saying. You are not making the final decision. You will give your initial thoughts that will be given to someone else to analyze the data alongside your thoughts."
chain_prompt_2 = f"You will be provided with information about a patient with the end goal of determining if they have an allergy. You are given the patient data and you are also given a first thought from another source of what they believe the data is saying. You will look at the data and their thought and then respond with your first thought about what the data and source are claiming to generate your own idea. You are not making the final decision. You will give your initial thoughts that will be given to someone else to analyze the data alongside your thoughts."
chain_prompt_3 = f"In a one word answer either 'True' or 'False' and based on the following information, does the patient have an allergy? You will be provided with information about a patient with the end goal of determining if they have an allergy. You are given the patient data and you are also given 2 thoughts from other sources of what they believe the data is saying. You will look at the data and their thoughts and then respond with your first thought about what the data and source are claiming to generate your own idea. You are making the final decision."
chain_prompts = [chain_prompt_1, chain_prompt_2, chain_prompt_3]
zero_shot_prompt = f"In a one word answer either 'True' or 'False' (no punctuation) and based on the following information, does the patient have an allergy?"
one_shot_prompt = f"You are a medical professional who specializes in diagnosing patients who have allergies based on a small description of their medical records\n. In a one word answer either 'True' or 'False' and based on the following information, does the patient have an allergy?"
multi_shot_prompt = f"You are a medical professional who specializes in diagnosing patients who have allergies based on a small description of their medical records\n. A patient such as: ```{allergy_patients.iloc[0]}``` and a patient such as: ```{allergy_patients.iloc[1]}``` are correctly diagnosed with an allergy. \nIn a one word answer either 'True' or 'False' and based on the following information, does the patient have an allergy?"

Patients: 2,007,217


In [2]:
def patient_entry_to_message(entry):
    current_year = datetime.datetime.now().year
    gender_i = "He" if entry["gender"] == "M" else "She"
    gender_p = "His" if entry["gender"] == "M" else "Her"

    personal = f"The patient married status is : {entry['marital']}. {gender_i} is {current_year - entry['birthyear']} years old, is {entry['race']} and {entry['ethnicity']}. {gender_p} income is {entry['income']}."
    immunization = f"{gender_i} has {gender_p} {entry['immunization_description']} ({entry['immunization_code']}) immunization on {entry['immunization_date']}"
    observation = f"{gender_i} has {entry['observation_description']} ({entry['observation_code']}) on {entry['observation_date']} with a value of {entry['observation_value']} {entry['observation_units']}"
    allergies = ""#f"{gender_i} has {entry['allergiy']}"

    return f"{personal} {immunization} {observation} {allergies}"

def call_llm(prompt, model):
    data = {
        "model": model,
        "prompt": prompt,
        "stream": False  # Set to True if you want to handle streamed response
    }
    single_response = requests.post(_url, headers=_headers, data=json.dumps(data))
    response_json = single_response.json()
    return response_json

def llm_multimessage(prompt):
    prompt = f"In a one word answer either 'True' or 'False' and based on the following information, does the patient have an allergy? {prompt}"
    data = {
        "model": _model,
        "messages": [
        {
            "role": "system",
            "content": "You are a helpful assistant who explains things clearly and concisely."
        },
        {
            "role": "user",
            "content": prompt
        }
    ], "stream": False}
    multi_response = requests.post(_url, headers=_headers, data=json.dumps(data), stream=True)
    print(multi_response.json()["message"]["content"])

def run_llm_predictions2(group, data, path, prompt, model=_model, start=0, end=0, runs=0):
    results = []

    # If runs is zero start & end are expected to be set to valid numbers
    if runs == 0:
        count = end -start
        runs = count // group

    for i in range(runs):
        s = time.time()
        ss = i * group
        ee = ss + group - 1

        # Iterate patient entries
        for index, entry in data.iterrows():
            if index < ss:
                # Skip entry if prior to start
                continue
            elif index > ee:
                # Skip prediction if reached 'end'
                break
            else:
                # Run the prediction
                message = patient_entry_to_message(entry)
                response = call_llm(prompt + message, model)
                result = (response["response"].strip(), bool(entry["is_allergy"]))
                results.append(result)
                print(f"Completed entry {index}/{count}", flush=True)

        model_str = model.split(":")[0]
        # Save results to a file
        with open(f"{path}/{model_str}/{start}-{end}.txt", "w") as file:
            for prediction, label in results:
                file.write(f"Prediction: {prediction}, Label: {label}\n")

        elapse = time.time() - s
        print(f"Current Run: {i+ 1}/{runs} | Elapsed Time: {elapse:.2f} seconds")

    return results

def run_llm_predictions(groups, runs, data, path, prompts, model=_model):
    results = []
    for i in range(runs):
        s = time.time()

        start = i * groups
        end = start + groups - 1

        # Iterate patient entries
        for index, entry in data.iterrows():
            if index < start:
                # Skip entry if prior to start
                continue
            elif index > end:
                # Skip prediction if reached 'end'
                break
            else:
                # Run the prediction
                message = patient_entry_to_message(entry)
                response = []
                thought = ""

                for prompt in prompts:
                    custom_prompt = f"{prompt}  \n```{message}```"
                    if len(prompts) > 1:
                        custom_prompt += f" ```{thought}```"

                    response = call_llm(custom_prompt, model)
                    thought = response["response"].strip() + "\n"

                # remove any punctuation
                prediction = response["response"].strip()
                prediction = prediction.replace(".", "").replace(",", "").replace(";", "").replace(":", "")
                result = (prediction, bool(entry["is_allergy"]))
                results.append(result)


        model_str = model.split(":")[0]
        # Save results to a file
        with open(f"{path}/{model_str}/{start}-{end}.txt", "w") as file:
            for prediction, label in results:
                file.write(f"Prediction: {prediction}, Label: {label}\n")

        elapse = time.time() - s
        print(f"Current Run: {i+ 1}/{runs} | Elapsed Time: {elapse:.2f} seconds")

def calculate_result_stats(groups, runs, path, model=_model, show_bonus=False):
    results = []
    for i in range(runs):
        start = i * groups
        end = start + groups - 1

        model_str = model.split(":")[0]
        # Read results from file
        with open(f"{path}/{model_str}/{start}-{end}.txt", "r") as file:
            lines = file.readlines()
            for line in lines:
                line_split = line.strip().split(",")

                y_pred = line_split[0].split(":")[1].strip()
                y_label = line_split[1].split(":")[1].strip()

                result = (y_pred, y_label)
                results.append(result)

    # Calculate & print stats
    TP, TN, FN, FP, CC = 0, 0, 0, 0, 0
    for prediction, label in results:

        if label == "True" and prediction == "True":
            TP += 1
        elif label == "True" and prediction == "False":
            FN += 1
        elif label == "False" and prediction == "True":
            FP += 1
        elif label == "False" and prediction == "False":
            TN += 1

        if label == prediction:
            CC += 1

    # Print out stats
    accuracy = 0
    precision = 0
    recall = 0
    try:
        accuracy = (TP + TN) / (TP + TN + FP + FN)
        precision = TP / (TP + FP) if (TP + FP) != 0 else 0
        recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    except ZeroDivisionError:
        print("ZeroDivisionError: Division by zero occurred in accuracy, precision, or recall calculation.")

    print(f"Amount of entries: {len(results)}")
    print(f"Amount Correct: {CC}")
    print(f"Accuracy: {accuracy:.4f}")
    if show_bonus:
        print(f"True Positives: {TP}")
        print(f"True Negatives: {TN}")
        print(f"False Positives: {FP}")
        print(f"False Negatives: {FN}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")

# Run LLM predictions using gemma (Zero, One, Multi Shot)

In [None]:
# Zero shot
run_llm_predictions(group_count, run_count, patients_shuffle, "results/Zero Shot", [zero_shot_prompt])

# One Shot
run_llm_predictions(group_count, run_count, patients_shuffle, "results/One Shot", [one_shot_prompt])

# Multi Shot
run_llm_predictions(group_count, run_count, patients_shuffle, "results/Multi Shot", [multi_shot_prompt])


## Calculate Stats of the results

In [None]:
print("Zero Shot")
calculate_result_stats(group_count, run_count, "results/Zero Shot")

print("\n\nOne Shot")
calculate_result_stats(group_count, run_count, "results/One Shot")

print("\n\nMulti Shot")
calculate_result_stats(group_count, run_count, "results/Multi Shot")

# Chain of thought

In [None]:
group_count = 1000
run_count = 3

run_llm_predictions(group_count, run_count, patients_shuffle, "results/Chain", chain_prompts, "llama3.2:latest")

# run_llm_predictions(group_count, run_count, patients_shuffle, "results/Zero Shot", zero_shot_prompt, "gemma3:12b")

# run_llm_predictions(group_count, run_count, patients_shuffle, "results/Zero Shot", zero_shot_prompt, "deepseek-r1:1.5b")


# Shows stats for each model
# calculate_result_stats(group_count, run_count, "results/Zero Shot", "gemma3:12b")
# calculate_result_stats(group_count, run_count, "results/Zero Shot", "deepseek-r1:1.5b")
calculate_result_stats(group_count, run_count, "results/Chain", "llama3.2:latest")