In [None]:
import sys
import pandas as pd
sys.path.append("../../")
from llm_api import ClientLLM
from utils import load_samples_text, load_promts
import time

In [None]:
api_key = "FALSE" 
api_base = "http://localhost:8888/v1"
model = "BioMistral/BioMistral-7B" 
temperature = 0.0


chat_instance = ClientLLM(model, api_key, api_base, temperature)

In [None]:
PATH_TUIS = '../../promts/tuis_desc.csv'
tuis_df = pd.read_csv(PATH_TUIS)
TUIS = tuis_df["Name"].to_list()

In [None]:
disease = "Mycosis"
text = "Most common mild mycoses often present with a rash. Infections within the skin or under the skin may present with a lump and skin changes. Less common deeper fungal infections may present with pneumonia like symptoms or meningitis."

In [None]:
prompts = load_promts("../../promts/prompts.xlsx")
prompts

In [None]:
prompt_zero = prompts[2]

In [None]:
prompt_content = prompt_zero.msg.format(disease=disease, tuis=", ".join(TUIS), text=text)
template = [{"role": "user", "content": prompt_content}]
template

In [None]:
response = chat_instance.ask_complete(template, 100)

In [None]:
response.choices[0].message.content

# LLama model

In [None]:
from tqdm import tqdm
import json
from time import sleep

In [None]:
PATH = "../../standard/golds_v4.json"

In [None]:
golds = []

with open(PATH) as f:
    golds = json.load(f)

# No External Knowledge

## Zero-Shot

In [None]:
tokens = {}

In [None]:
results_zero = []
tokens["zero"] = {"base": prompt_zero.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_zero.msg.format(disease=ds, tuis=", ".join(TUIS), text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    answ = response.choices[0].message.content
    sleep(2)
    results_zero.append({"y_pred":answ, "y_true":val["sings"]})
    tokens["zero"]["total"] += response.usage.total_tokens

In [None]:
results_zero[-1]

In [None]:
with open("../../results/BioMistral_zero_results.json", "w") as f:
    json.dump(results_zero, f, indent=2)

# Few Shot

In [None]:
prompt_few = prompts[3]
prompt_few

In [None]:
PATH_SAMPLES = '../../promts/samples.csv'
samples = load_samples_text(PATH_SAMPLES)

In [None]:
samples

## 1-shot

In [None]:
results_1_shot = []
tokens["1-shot"] = {"base": prompt_few.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_few.msg.format(disease=ds, tuis=", ".join(TUIS), samples=samples[:2], text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    time.sleep(2)
    answ = response.choices[0].message.content
    results_1_shot.append({"y_pred": answ, "y_true": val["sings"]})
    tokens["1-shot"]["total"] += response.usage.total_tokens

In [None]:
results_1_shot[:2]

In [None]:
with open("../../results/Bio_Mistral_1_shot_results.json", "w") as f:
    json.dump(results_1_shot, f, indent=2)

## 3-shot

In [None]:
results_3_shot = []
tokens["3-shot"] = {"base": prompt_few.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_few.msg.format(disease=ds, tuis=", ".join(TUIS), samples=samples[:6], text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    time.sleep(2)
    answ = response.choices[0].message.content
    results_3_shot.append({"y_pred":answ, "y_true":val["sings"]})
    tokens["3-shot"]["total"] += response.usage.total_tokens

In [None]:
results_3_shot[:2]

In [None]:
with open("../../results/Bio_Mistral_3_shot_results.json", "w") as f:
    json.dump(results_3_shot, f, indent=2)

## 5_shot

In [None]:
results_5_shot = []
tokens["5-shot"] = {"base": prompt_few.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_few.msg.format(disease=ds, tuis=", ".join(TUIS), samples=samples[:10], text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    time.sleep(2)
    answ = response.choices[0].message.content
    results_5_shot.append({"y_pred":answ, "y_true":val["sings"]})
    tokens["5-shot"]["total"] += response.usage.total_tokens

In [None]:
results_5_shot[:2]

In [None]:
with open("../../results/Bio_Mistral_5_shot_results.json", "w") as f:
    json.dump(results_5_shot, f, indent=2)

## 10-shot

In [None]:
results_10_shot = []
tokens["10-shot"] = {"base": prompt_few.n_tokens ,"total" : 0}


for ds, val in tqdm(golds.items()):
    prompt_content = prompt_few.msg.format(disease=ds, tuis=", ".join(TUIS), samples=samples, text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    time.sleep(2)
    answ = response.choices[0].message.content
    results_10_shot.append({"y_pred": answ, "y_true": val["sings"]})
    tokens["10-shot"]["total"] += response.usage.total_tokens

In [None]:
results_10_shot[:2]

In [None]:
with open("../../results/Bio_Mistral_10_shot_results.json", "w") as f:
    json.dump(results_10_shot, f, indent=2)

# External Knoledge

In [None]:
tuis_long = ""


for index, row in tuis_df.iterrows():
    tuis_long += "- " + row["Name"] + ": " + row["Definition"] + "\n"

## Zero-Shot

In [None]:
results_zero_e = []
tokens["zero_e"] = {"base": prompt_zero.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_zero.msg.format(disease=ds, tuis=tuis_long, text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    answ = response.choices[0].message.content
    sleep(2)
    results_zero_e.append({"y_pred":answ, "y_true":val["sings"]})
    tokens["zero_e"]["total"] += response.usage.total_tokens

In [None]:
with open("../../results/Bio_Mistral_zero_e_results.json", "w") as f:
    json.dump(results_zero_e, f, indent=2)

## Few-shot

## 1-shot

In [None]:
results_1_shot_e = []
tokens["1_shot_e"] = {"base": prompt_few.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_few.msg.format(disease=ds, tuis=tuis_long, samples=samples[:2], text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    time.sleep(2)
    answ = response.choices[0].message.content
    results_1_shot_e.append({"y_pred":answ, "y_true":val["sings"]})
    tokens["1_shot_e"]["total"] += response.usage.total_tokens

In [None]:
results_1_shot_e[-2]

In [None]:
with open("../../results/Bio_Mistral_1_shot_e_results_p.json", "w") as f:
    json.dump(results_1_shot_e, f, indent=2)

## 3-shot

In [None]:
results_3_shot_e = []
tokens["3_shot_e"] = {"base": prompt_few.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_few.msg.format(disease=ds, tuis=tuis_long, samples=samples[:5], text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    time.sleep(2)
    answ = response.choices[0].message.content
    results_3_shot_e.append({"y_pred":answ, "y_true":val["sings"]})
    tokens["3_shot_e"]["total"] += response.usage.total_tokens

In [None]:
results_3_shot_e[:2]

In [None]:
with open("../../results/Bio_Mistral_3_shot_e_results_p.json", "w") as f:
    json.dump(results_3_shot_e, f, indent=2)

## 5-shot

In [None]:
results_5_shot_e = []
tokens["5_shot_e"] = {"base": prompt_few.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_few.msg.format(disease=ds, tuis=tuis_long, samples=samples[:10], text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    time.sleep(2)
    answ = response.choices[0].message.content
    results_5_shot_e.append({"y_pred":answ, "y_true":val["sings"]})
    tokens["5_shot_e"]["total"] += response.usage.total_tokens

In [None]:
results_5_shot_e[:2]

In [None]:
with open("../../results/Bio_Mistral_5_shot_e_results.json", "w") as f:
    json.dump(results_5_shot_e, f, indent=2)

## 10-shot

In [None]:
results_10_shot_e = []
tokens["10_shot_e"] = {"base": prompt_few.n_tokens ,"total" : 0}

for ds, val in tqdm(golds.items()):
    prompt_content = prompt_few.msg.format(disease=ds, tuis=tuis_long, samples=samples, text=val["text"])
    promt = [{"role": "user", "content": prompt_content}]
    response = chat_instance.ask_complete(promt, 100)
    time.sleep(2)
    answ = response.choices[0].message.content
    results_10_shot_e.append({"y_pred":answ, "y_true":val["sings"]})
    tokens["10_shot_e"]["total"] += response.usage.total_tokens

In [None]:
results_10_shot_e[:2]

In [None]:
with open("../../results/Bio_Mistral_10_shot_e_results.json", "w") as f:
    json.dump(results_10_shot_e, f, indent=2)

In [None]:
tokens