In [1]:
import base64
import os
from google import genai
from google.genai import types
from dotenv import load_dotenv
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

load_dotenv()

True

In [198]:
def medical_compliance_checker(region, user_input):


    prompt = f"""
    You are a compliance expert specializing in medical regulatory standards. Evaluate the following medical claim for compliance based on {region} regulations. Return either 'Compliant' or 'Non-Compliant' along with a brief reason only if non-compliant.

    Example Medical Claim: This drug guarantees 100% effectiveness in curing diabetes.
    
    Answer Format:

    Classification: [Compliant / Non-Compliant]

    Explanation: [Brief reason for classification only if Non-Compliant]

    Example Classification: Non-Compliant

    Example Explanation: Absolute claims are not allowed.

    Now, Evaluate the following medical claim:

    Medical Claim: {user_input}

    Classification: 

    Explanation: 
    """

    client = genai.Client(
        api_key=os.getenv("GEMINI_API_KEY")
    )

    model = "gemini-2.0-flash"
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_text(text=prompt),
            ],
        ),
    ]
    generate_content_config = types.GenerateContentConfig(
        temperature=0.5,
        top_p=0.95,
        top_k=40,
        max_output_tokens=4096,
        response_mime_type="text/plain",
    )

    model_response = ""
    
    for chunk in client.models.generate_content_stream(
        model=model,
        contents=contents,
        config=generate_content_config,
    ):
        # print(chunk.text, end="", flush=True)
        model_response += chunk.text
    
    classification = model_response.split("Classification:")[1].split("\n")[0].strip()
    explanation = model_response.split("Explanation:")[1].strip()
    
    return classification, explanation

In [None]:
if __name__ == "__main__":
    # Enter Region FDA (US), EMA (Europe), or HSA (Singapore).
    region = input("Enter Region: ")
    user_input = input("Type: ")
    classification, explanation = medical_compliance_checker(region, user_input)

Created Synthetic data from PubMed which is a search engine maintained by US National Library that provides access to biomedical and life science research articles.

In [141]:
df = pd.read_csv('synthetic_medical_claims_1.csv')

Creating New columns Model_Classified_As and Model_Response and iterated over the dataset to get generation for each row to check accuracy.

In [None]:
for i in range(len(df)):
    df.loc[i,'Model_Classified_As'], df.loc[i, 'Model_Response'] = medical_compliance_checker(region='FDA', user_input=df['Claim'][i])

In [143]:
df

Unnamed: 0,Claim,Compliance,Model_Classified_As,Model_Response
0,Unicompartmental knee arthroplasty (UKA) is a ...,Compliant,Compliant,
1,Our weight loss supplement melts fat instantly...,Non-Compliant,Non-Compliant,This claim makes unsubstantiated claims of ins...
2,This product reverses aging and guarantees a y...,Non-Compliant,Non-Compliant,Claims of reversing aging and guaranteeing a y...
3,Rehabilitation following total knee arthroplas...,Compliant,Compliant,
4,This drug guarantees 100% effectiveness in cur...,Non-Compliant,Non-Compliant,Absolute claims are not allowed.
5,Knee pain is a highly prevalent condition in t...,Compliant,Compliant,
6,Clinical studies show this knee surgery has a ...,Compliant,Compliant,
7,Coronal plane alignment of the knee (CPAK) has...,Compliant,Compliant,
8,No other drug on the market works as fast as o...,Non-Compliant,Non-Compliant,Makes unsubstantiated superiority and efficacy...
9,Take this pill and never worry about heart dis...,Non-Compliant,Non-Compliant,This claim makes an absolute and unsubstantiat...


In [195]:
# To check counts of each class in each column
df['Compliance'].value_counts(), df['Model_Classified_As'].value_counts()

(Compliance
 Non-Compliant    11
 Compliant         8
 Name: count, dtype: int64,
 Model_Classified_As
 Non-Compliant    12
 Compliant         7
 Name: count, dtype: int64)

In [None]:
# save to disk
# df.to_csv('Model_Evaluation.csv', index=False)

In [None]:
model_eval = pd.read_csv('Model_Evaluation.csv')

ACCURACY

In [None]:
# Extract Compliance and Model_Classified_As from dataframe and store it into a list
labels = df['Compliance'].tolist()
model_classifications = df['Model_Classified_As'].tolist()
# Convert all values to lower case to maintain uniformity
labels = [i.lower().strip() for i in labels]
model_classifications = [i.lower().strip() for i in model_classifications]

In [184]:
accuracy = accuracy_score(labels, model_classifications)
precision = precision_score(labels, model_classifications, pos_label="non-compliant")
recall = recall_score(labels, model_classifications, pos_label="non-compliant")
f1 = f1_score(labels, model_classifications, pos_label="non-compliant")

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

Accuracy: 0.95
Precision: 0.92
Recall: 1.00
F1-Score: 0.96
