<a href="https://colab.research.google.com/github/ManagementBC/Organ/blob/main/Performancetests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import time
import openai

# ✅ OpenAI API Key
openai.api_key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

# ✅ Fine-tuned model ID
MODEL_ID = "ft:gpt-4o-mini-2024-07-18:personal:organdonations:AmbvC8Vu"

# ✅ Define test donor cases
donor_test_cases = [1, 2, 3, 4, 5]  # Example donor IDs

# ✅ Function to get response time
def measure_response_time(donor_id):
    prompt = {
        "donor_id": donor_id,
        "recipients": [{"recipient_id": 10}, {"recipient_id": 20}],  # Example recipient data
    }

    start_time = time.time()  # Start timing

    response = openai.ChatCompletion.create(
        model=MODEL_ID,
        messages=[
            {"role": "system", "content": "You are an AI designed to match organ donors to recipients. Return only JSON output."},
            {"role": "user", "content": str(prompt)},
        ],
    )

    end_time = time.time()  # End timing
    response_time = round(end_time - start_time, 3)  # Calculate elapsed time (in seconds)

    return response_time, response  # Return response time and response

# ✅ Measure response time for 5 different donor cases
response_times = []
for donor in donor_test_cases:
    response_time, response = measure_response_time(donor)
    response_times.append(response_time)
    print(f"⏳ Response Time for Donor {donor}: {response_time} seconds")

# ✅ Calculate and display the average response time
average_response_time = round(sum(response_times) / len(response_times), 3)
print(f"\n✅ Average Response Time over {len(donor_test_cases)} test cases: {average_response_time} seconds")


⏳ Response Time for Donor 1: 1.168 seconds
⏳ Response Time for Donor 2: 0.918 seconds
⏳ Response Time for Donor 3: 0.654 seconds
⏳ Response Time for Donor 4: 3.94 seconds
⏳ Response Time for Donor 5: 8.565 seconds

✅ Average Response Time over 5 test cases: 3.049 seconds


In [None]:
from rouge_score import rouge_scorer

# Convert JSON responses to strings for evaluation
reference = json.dumps(expected_output)
candidate = json.dumps(json.loads(model_output))

# Compute BLEU Score
bleu_score = sentence_bleu([reference.split()], candidate.split())

# Compute ROUGE Score
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
rouge_scores = scorer.score(reference, candidate)

# Print the evaluation results
print(f"🔹 BLEU Score: {bleu_score:.4f}")
print(f"🔹 ROUGE-1 Score: {rouge_scores['rouge1'].fmeasure:.4f}")
print(f"🔹 ROUGE-L Score: {rouge_scores['rougeL'].fmeasure:.4f}")


🔹 BLEU Score: 0.8091
🔹 ROUGE-1 Score: 0.9091
🔹 ROUGE-L Score: 0.9091


In [None]:
# Install required dependencies (if not installed)
!pip install openai==0.28 gradio --quiet

import openai
import json
import gradio as gr

# Set up OpenAI API key (Replace 'your-api-key' with your actual key)
openai.api_key = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"  # Make sure to set this properly in Colab

# Define fine-tuned model ID
MODEL_ID = "ft:gpt-4o-mini-2024-07-18:personal:organdonations:AmbvC8Vu"

# Hardcoded recipient data (since IPFS is not set up yet)
recipient_data = [
    {
        "id": "R67890",
        "age": 50,
        "gender": "Female",
        "blood_type": "O",
        "hla_typing": ["A1", "A3", "B7", "B27", "DR15", "DR11"],
        "geographical_location": "San Francisco, CA",
        "waiting_time_days": 365,
        "medical_urgency_status": "High"
    },
    {
        "id": "R23456",
        "age": 35,
        "gender": "Male",
        "blood_type": "O",
        "hla_typing": ["A1", "A2", "B7", "B8", "DR15", "DR4"],
        "geographical_location": "Boston, MA",
        "waiting_time_days": 200,
        "medical_urgency_status": "Moderate"
    },
    {
        "id": "R81239",
        "age": 45,
        "gender": "Female",
        "blood_type": "A",
        "hla_typing": ["A2", "A3", "B7", "B44", "DR4", "DR13"],
        "geographical_location": "Seattle, WA",
        "waiting_time_days": 500,
        "medical_urgency_status": "High"
    },
    {
        "id": "R99887",
        "age": 60,
        "gender": "Male",
        "blood_type": "B",
        "hla_typing": ["A1", "A11", "B15", "B44", "DR11", "DR7"],
        "geographical_location": "Miami, FL",
        "waiting_time_days": 150,
        "medical_urgency_status": "Low"
    },
    {
        "id": "R55661",
        "age": 28,
        "gender": "Female",
        "blood_type": "AB",
        "hla_typing": ["A3", "A24", "B8", "B35", "DR1", "DR13"],
        "geographical_location": "Chicago, IL",
        "waiting_time_days": 310,
        "medical_urgency_status": "Moderate"
    }
]

# Function to process donor input and generate matches
def match_donor(donor_data):
    """
    Sends donor and recipient data to the fine-tuned GPT-4 model for matching.
    """
    prompt = f"""
    Given the donor's medical data and the recipient dataset below, identify the best two matches.

    Donor Data:
    {json.dumps(donor_data, indent=2)}

    Recipients Data:
    {json.dumps(recipient_data, indent=2)}

    Provide the best matches with scores and justification.
    """

    try:
        response = openai.ChatCompletion.create(
            model=MODEL_ID,
            messages=[{"role": "system", "content": "You are an AI specializing in organ donor-recipient matching."},
                      {"role": "user", "content": prompt}]
        )

        result = response["choices"][0]["message"]["content"]
        return result  # Returns the AI-generated response

    except Exception as e:
        return f"Error: {str(e)}"

# Gradio UI for interactive testing
def gradio_interface(donor_id, age, gender, blood_type, hla_typing, location, organ_type, hepatitis_b, hiv):
    """
    Captures user input, formats it into donor JSON, and processes matching.
    """
    donor_data = {
        "id": donor_id,
        "age": int(age),
        "gender": gender,
        "blood_type": blood_type,
        "hla_typing": hla_typing.split(","),  # Convert comma-separated HLA into a list
        "geographical_location": location,
        "organ_type": organ_type,
        "serology": {
            "hepatitis_b": hepatitis_b,
            "hiv": hiv
        }
    }

    # Run the AI model with the donor data
    output = match_donor(donor_data)
    return output

# Create the Gradio UI
gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Donor ID"),
        gr.Number(label="Age"),
        gr.Radio(["Male", "Female"], label="Gender"),
        gr.Textbox(label="Blood Type"),
        gr.Textbox(label="HLA Typing (comma-separated)"),
        gr.Textbox(label="Geographical Location"),
        gr.Textbox(label="Organ Type"),
        gr.Radio(["Positive", "Negative"], label="Hepatitis B"),
        gr.Radio(["Positive", "Negative"], label="HIV"),
    ],
    outputs="text",
    title="Organ Donation Matching System",
    description="Enter donor data and get the best matching recipients."
).launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1c453e0df0eec771f7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [10]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Ground truth matches (first 2 recipients for each donor)
ground_truth = [
    ["R67890", "R23456"],   # D001
     ["R67890", "R23456"],   # D001
    ["R81239", "R55661"],   # D002
    ["R99887", "R23456"],   # D003
    ["R55661", "R81239"],   # D004
    ["R23456", "R67890"],    # D005
    ["R23456", "R67658"],   # D006
    ["R67890", "R23455"],        # D001 ✅✅
    ["R55661", "R81239"]   # D004
]

# LLM output matches (first 2 recipients for each donor)
llm_predictions = [
    ["R67890", "R23456"],
     ["R67890", "R67890"],
    ["R81239", "R81239"],
    ["R99887", "R67890"],
    ["R55661", "R67890"],
    ["R00195_1", "R00195_2"],
    ["R67658", "R67658"],
    ["R67890", "R67890"],
    ["R55661", "R67890"]

# Flatten and convert to binary classification: 1 if correct match, 0 otherwise
y_true = []
y_pred = []

for truth, pred in zip(ground_truth, llm_predictions):
    for p in pred:
        if p in truth:
            y_true.append(1)
            y_pred.append(1)
        else:
            y_true.append(1)  # a correct match expected
            y_pred.append(0)  # but LLM failed to match it

# Fill remaining ground truth items if LLM missed them
for truth, pred in zip(ground_truth, llm_predictions):
    missed = len(set(truth) - set(pred))
    for _ in range(missed):
        y_true.append(1)
        y_pred.append(0)

# Metrics calculation
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall:    {recall:.2f}")
print(f"F1-score:  {f1:.2f}")


Precision: 1.00
Recall:    0.48
F1-score:  0.65
