# Load patient data and imports

In [49]:
import requests
import json
import pandas
import datetime
import time
import os
import warnings
warnings.filterwarnings('ignore')

_url = "http://localhost:11434/api/generate"
_headers = {"Content-Type": "application/json"}
patients_original = pandas.read_csv("data/allergy_patients.csv", low_memory=False)

print(f"Patients: {len(patients_original):,}")

new_dataset = False
if new_dataset:
    # Shuffle data
    patients_shuffle = patients_original.sample(frac=1).reset_index(drop=True)

    # Save dataset to csv
    patients_shuffle.to_csv('data/shuffled_data.csv', index=False, header=True)
else:
    # Used to load existing shuffle dataset. This is for testing on same set with different models.
    patients_shuffle = pandas.read_csv("data/shuffled_data.csv", low_memory=False)


# Chain Prompts
allergy_patients = patients_shuffle[patients_shuffle["is_allergy"] == 1].sample(n=2, random_state=1)
chain_prompt_1 = f"You will be provided with information about a patient with the end goal of determining if they have an allergy. You will look at the data and then respond with your first thought about what the data is saying. You are not making the final decision. You will give your initial thoughts that will be given to someone else to analyze the data alongside your thoughts."
chain_prompt_2 = f"You will be provided with information about a patient with the end goal of determining if they have an allergy. You are given the patient data and you are also given a first thought from another source of what they believe the data is saying. You will look at the data and their thought and then respond with your first thought about what the data and source are claiming to generate your own idea. You are not making the final decision. You will give your initial thoughts that will be given to someone else to analyze the data alongside your thoughts."
chain_prompt_3 = f"In a one word answer either 'True' or 'False' and based on the following information, does the patient have an allergy? You will be provided with information about a patient with the end goal of determining if they have an allergy. You are given the patient data and you are also given 2 thoughts from other sources of what they believe the data is saying. You will look at the data and their thoughts and then respond with your thoughts about what the data and source are claiming to generate your own idea. You are making the final decision."
chain_prompts = [chain_prompt_1, chain_prompt_2, chain_prompt_3]

# Shot prompts
zero_shot_prompt = f"In a one word answer either 'True' or 'False' (no punctuation) and based on the following information, does the patient have an allergy?"
one_shot_prompt = f"You are a medical professional who specializes in diagnosing patients who have allergies based on a small description of their medical records\n. In a one word answer either 'True' or 'False' and based on the following information, does the patient have an allergy?"
multi_shot_prompt = f"You are a medical professional who specializes in diagnosing patients who have allergies based on a small description of their medical records\n. A patient such as: ```{allergy_patients.iloc[0]}``` and a patient such as: ```{allergy_patients.iloc[1]}``` are correctly diagnosed with an allergy. \nIn a one word answer either 'True' or 'False' and based on the following information, does the patient have an allergy?"

Patients: 2,007,217


In [None]:
def patient_entry_to_message(entry):
    current_year = datetime.datetime.now().year
    gender_i = "He" if entry["gender"] == "M" else "She"
    gender_p = "His" if entry["gender"] == "M" else "Her"

    personal = f"The patient married status is : {entry['marital']}. \n{gender_i} is {current_year - entry['birthyear']} years old, is {entry['race']} and {entry['ethnicity']}. \n{gender_p} income is ${entry['income']:,}."
    immunization = f"{gender_i} had {gender_p} {entry['immunization_description']} ({entry['immunization_code']}) immunization on {entry['immunization_date']}"
    observation = f"{gender_i} has {entry['observation_description']} ({entry['observation_code']}) on {entry['observation_date']} with a value of {entry['observation_value']} {entry['observation_units']}"
    allergies = ""#f"{gender_i} has {entry['allergiy']}"

    return f"{personal} {immunization} {observation} {allergies}"

def call_llm(prompt, model):
    data = {
        "model": model,
        "prompt": prompt,
        "stream": False  # Set to True if you want to handle streamed response
    }
    single_response = requests.post(_url, headers=_headers, data=json.dumps(data))
    response_json = single_response.json()
    return response_json

def clean_prediction(response):
    # Remove any punctuation
    prediction = response.replace(".", "").replace(",", "").replace(";", "").replace(":", "")

    # Guarantee a one word response
    if " " in prediction:
        prediction = prediction.split(" ")[0]
    else:
        prediction = prediction

    return prediction

def run_llm_predictions(groups, runs, data, path, prompts, model):
    for i in range(runs):
        results = []
        s = time.time()

        start = i * groups
        end = start + groups - 1

        # Iterate patient entries
        for index, entry in data.iterrows():
            if index < start:
                # Skip entry if prior to start
                continue
            elif index > end:
                # Skip prediction if reached 'end'
                break
            else:
                # Run the prediction
                message = patient_entry_to_message(entry)
                response = []
                thought = ""

                for prompt in prompts:
                    custom_prompt = f"{prompt}  \n```{message}```"
                    if len(prompts) > 1:
                        custom_prompt += f" ```{thought}```"

                    response = call_llm(custom_prompt, model)
                    thought = response["response"].strip() + "\n"

                # remove any punctuation
                prediction = clean_prediction(response["response"].strip())
                result = (prediction, bool(entry["is_allergy"]))
                results.append(result)


        # Save results to a file
        # model_str = model.split(":")[0]
        #with open(f"{path}/{model_str}/{start}-{end}.txt", "w") as file:
        with open(f"{path}/{start}-{end}.txt", "w") as file:
            for prediction, label in results:
                file.write(f"Prediction: {prediction} | Label: {label}\n")

        elapse = time.time() - s
        print(f"Current Run: {i+ 1}/{runs} | Elapsed Time: {elapse:.2f} seconds")

def run_llm_predictions_on_set(start, end, groups, data, path, prompts, model):

    count = end - start
    runs = count // groups

    for i in range(runs):
        ss = i * groups + start
        ee = ss + groups - 1

        s = time.time()
        results = []
        # Done in a single run
        for index, entry in data.iterrows():
            if index < ss:
                continue
            elif index > ee:
                break
            else:
                # Run the prediction
                message = patient_entry_to_message(entry)
                response = []
                thought = ""

                for prompt in prompts:
                    custom_prompt = f"{prompt}  \n```{message}```"
                    if len(prompts) > 1:
                        custom_prompt += f" ```{thought}```"

                    response = call_llm(custom_prompt, model)
                    thought = response["response"].strip() + "\n"

                # remove any punctuation
                prediction = clean_prediction(response["response"].strip())
                result = (prediction, bool(entry["is_allergy"]))
                results.append(result)

        # Save results to a file
        with open(f"{path}/{ss}-{ee}.txt", "w") as file:
            for prediction, label in results:
                file.write(f"Prediction: {prediction} | Label: {label}\n")

        elapse = time.time() - s
        print(f"Current Run: {i+1}/{runs} Elapsed Time: {elapse:.2f} seconds")

def calculate_result_stats(path, show_bonus=False):
    results = []
    # Read results from file
    with open(f"{path}/Results.txt", "r") as file:
        lines = file.readlines()
        for line in lines:
            line_split = line.strip().split("|")

            y_pred = line_split[0].split(":")[1].strip()
            y_label = line_split[1].split(":")[1].strip()

            result = (y_pred, y_label)
            results.append(result)

    # Calculate & print stats
    TP, TN, FN, FP, CC, UN = 0, 0, 0, 0, 0, 0
    for prediction, label in results:

        if label == "True" and prediction == "True":
            TP += 1
        elif label == "True" and prediction == "False":
            FN += 1
        elif label == "False" and prediction == "True":
            FP += 1
        elif label == "False" and prediction == "False":
            TN += 1
        else:
            UN += 1
        if label == prediction:
            CC += 1

    # Print out stats
    accuracy = 0
    precision = 0
    recall = 0
    try:
        accuracy = (TP + TN) / (TP + TN + FP + FN)
        precision = TP / (TP + FP) if (TP + FP) != 0 else 0
        recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    except ZeroDivisionError:
        print("ZeroDivisionError: Division by zero occurred in accuracy, precision, or recall calculation.")

    print(f"Amount of entries: {len(results)}")
    print(f"Amount Correct: {CC}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Unknown: {UN}")
    if show_bonus:
        print(f"True Positives: {TP}")
        print(f"True Negatives: {TN}")
        print(f"False Positives: {FP}")
        print(f"False Negatives: {FN}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")

def calculate_result_stats_on_set(path, start, end, show_bonus=False):
    results = []
    # Read results from file
    with open(f"{path}/Results.txt", "r") as file:
        lines = file.readlines()

        for index, line in enumerate(lines):
            if index < start:
                continue
            elif index > end:
                break
            else:
                # Run the prediction
                line_split = line.strip().split("|")

                y_pred = line_split[0].split(":")[1].strip()
                y_label = line_split[1].split(":")[1].strip()

                result = (y_pred, y_label)
                results.append(result)

    # Calculate & print stats
    TP, TN, FN, FP, CC, UN = 0, 0, 0, 0, 0, 0
    for prediction, label in results:

        if label == "True" and prediction == "True":
            TP += 1
        elif label == "True" and prediction == "False":
            FN += 1
        elif label == "False" and prediction == "True":
            FP += 1
        elif label == "False" and prediction == "False":
            TN += 1
        else:
            print(f"Unknown label: {label} | Unknown prediction: {prediction}")
            UN += 1
        if label == prediction:
            CC += 1

    # Print out stats
    accuracy = 0
    precision = 0
    recall = 0
    try:
        accuracy = (TP + TN) / (TP + TN + FP + FN)
        precision = TP / (TP + FP) if (TP + FP) != 0 else 0
        recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    except ZeroDivisionError:
        print("ZeroDivisionError: Division by zero occurred in accuracy, precision, or recall calculation.")

    print(f"Amount of entries: {len(results)}")
    print(f"Amount Correct: {CC}")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Unknown: {UN}")
    if show_bonus:
        print(f"True Positives: {TP}")
        print(f"True Negatives: {TN}")
        print(f"False Positives: {FP}")
        print(f"False Negatives: {FN}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")

def combine_txt_results(path):
    # Delete file if it exists
    if os.path.exists(os.path.join(path, 'Results.txt')):
        os.remove(os.path.join(path, 'Results.txt'))

    # Get all the files in the directory
    files = [f for f in os.listdir(path) if f.endswith('.txt')]

    # Combine the contents of the files
    combined_results = []
    for file in files:
        with open(os.path.join(path, file), 'r') as f:
            lines = f.readlines()
            combined_results.extend(lines)

    # Write the combined results to a new file
    with open(os.path.join(path, 'Results.txt'), 'w') as f:
        f.writelines(combined_results)

def print_model_results(c_model):
    print(f"Model: {c_model} Results")
    path = f"results/{c_model}"
    combine_txt_results(f"{path}/Zero Shot")
    combine_txt_results(f"{path}/One Shot")
    combine_txt_results(f"{path}/Multi Shot")
    combine_txt_results(f"{path}/Chain")

    print("\nZero Shot")
    calculate_result_stats(f"{path}/Zero Shot")

    print("\n\nOne Shot")
    calculate_result_stats(f"{path}/One Shot")

    print("\n\nMulti Shot")
    calculate_result_stats(f"{path}/Multi Shot")

    print("\n\nChain Shot")
    calculate_result_stats(f"{path}/Chain")

# Run LLM predictions (Zero, One, Multi Shot, Chain Prompt)

In [50]:
def run_all_prediction_types(group_count, run_count, group_count_2, run_count_2, result_path, model):
    # Zero shot
    run_llm_predictions(group_count, run_count, patients_shuffle, f"{result_path}/Zero Shot", [zero_shot_prompt], model)
    #
    # One Shot
    run_llm_predictions(group_count, run_count, patients_shuffle, f"{result_path}/One Shot", [one_shot_prompt], model)

    # Multi Shot
    run_llm_predictions(group_count, run_count, patients_shuffle, f"{result_path}/Multi Shot", [multi_shot_prompt], model)

    # Change to only run on 3,000 entries (25,000 takes too long) for Chain Prompts
    chain_path = f"{result_path}/Chain"
    run_llm_predictions(group_count_2, run_count_2, chain_path, [chain_prompts], model)


# Run 25,000 entries as group of 1,000 for 25 runs, then run chain prompts for 3,000 entries as group of 1000 for 3 runs.
# run_all_prediction_types(1000, 25, 1000, 1, "results/gemma3", "gemma3:12b")
# run_all_prediction_types(1000, 25, 1000, 1, "results/llama3.2", "llama3.2:latest")
run_all_prediction_types(1, 1, 1, 1, "results/gemma3", "gemma3:12b")
run_all_prediction_types(1, 1, 1, 1, "results/llama3.2", "llama3.2:latest")

Current Run: 1/1 | Elapsed Time: 33.42 seconds
Current Run: 1/1 | Elapsed Time: 10.60 seconds


KeyboardInterrupt: 

## Calculate Stats of the results

In [None]:
print_model_results("gemma3")
print_model_results("llama3.2")