In [25]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from evaluate import load
import wget
import os
import csv
import weaviate
import weaviate.classes as wvc

In [12]:
# model_name = "mistralai/Mistral-7B-v0.1"
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
device = "cuda"
bertscore = load("bertscore")

In [3]:
bnb_config = BitsAndBytesConfig(
 load_in_4bit=True,
 bnb_4bit_quant_type="nf4",
 bnb_4bit_use_double_quant=True,
 bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(
 model_name,
 quantization_config=bnb_config,
 device_map="auto",
 trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

bin c:\Users\A2MIMO\Documents\conv2note\venv\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda118.dll


Loading checkpoint shards: 100%|██████████| 3/3 [00:13<00:00,  4.63s/it]


#### Regular prompting

In [104]:
conversation = open("test.txt", "r").readlines()
conversation = "".join(conversation)

prompt = f"""
<s>[INST] Write a resume of the following conversation between a doctor and a patient: {conversation}[/INST]
"""

print(prompt)


<s>[INST] Write a resume of the following conversation between a doctor and a patient: Doctor: Good morning, Mr. Smith. How are you feeling today?
Patient: Good morning, Doctor. I've been better, to be honest. I've been having some chest pain and shortness of breath lately.
Doctor: I'm sorry to hear that. How long have you been experiencing these symptoms?
Patient: It's been about a week now. At first, I thought it might just be stress, but it's been getting worse.
Doctor: I see. Have you noticed if anything triggers these symptoms or if they occur at certain times of the day?
Patient: Well, the chest pain seems to come on after I've been active or when I'm feeling particularly stressed.
Doctor: Okay. Have you had any previous heart-related issues or any family history of heart disease?
Patient: Not that I know of, Doctor.
Doctor: Alright. I'd like to conduct a physical examination and run some tests to better understand what might be causing your symptoms. We may need to perform an E

  conversation = open("test.txt", "r").readlines()


In [105]:
model_inputs = tokenizer(prompt, return_tensors="pt").to(device)

generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)

output = decoded[0]
resume = output.split("[/INST]")[1]
print(resume)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 title: Consultation between Doctor and Patient (Mr. Smith)

 Doctor and Patient exchanged pleasantries at the beginning of the consultation. The Patient expressed experiencing chest pain and shortness of breath for about a week, which initially seemed like stress-induced symptoms but have since worsened. The Doctor asked if there were any triggers or specific times when the symptoms occur, to which the Patient mentioned that the chest pain seems to appear after physical activity or increased stress levels.

 The Doctor inquired about any previous heart-related issues or family history of heart disease, but the Patient reported no such issues. The Doctor decided to perform an examination and requested specific tests, including an EKG, chest X-ray, and blood work. The Patient expressed a desire to understand the cause of the symptoms and agreed to the proposed tests and follow-up appointment.

 The Doctor recommended avoiding strenuous activities and managing stress to help alleviate s

In [66]:
results = bertscore.compute(predictions=[resume], references=[conversation], lang="en")
print(results)

{'precision': [0.8960577845573425], 'recall': [0.8609486818313599], 'f1': [0.8781524896621704], 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.37.2)'}


#### Dynamic k-shot

In [79]:
client = weaviate.connect_to_local()

In [80]:
def get_dataset():

    result = []

    # MTS-Dialog: 1.2k section-convo pairs
    # train_dataset_path = "https://raw.githubusercontent.com/abachaa/MTS-Dialog/main/Main-Dataset/MTS-Dialog-TrainingSet.csv" 
    
    # ACI Bench: 67 convo-note pairs
    train_dataset_path = "https://raw.githubusercontent.com/wyim/aci-bench/main/data/challenge_data/train.csv" 
    
    if not os.path.exists("./train.csv"):
        wget.download(train_dataset_path, "./train.csv")

    with open("./train.csv", encoding="utf-8") as file:
        csv_reader = csv.reader(file)

        for row in csv_reader:
            data = {}
            data["summary"] = row[3]
            data["conversation"] = row[2]
            
            result.append(data)

    result.pop(0)
    return result

Hey


In [89]:
if client.collections.exists("reports"):
    reports = client.collections.get("reports")
else:
    reports = client.collections.create(
        name="reports",
        vectorizer_config=wvc.config.Configure.Vectorizer.text2vec_transformers()
    )

if reports.__len__() == 0:
        dataset = get_dataset()
        for i in range(len(dataset)):
            reports.data.insert(dataset[i])

print("Collection length: ", reports.__len__())

Collection length:  67


In [90]:
conversation = open("test.txt", "r").readlines()
conversation = "".join(conversation)

  conversation = open("test.txt", "r").readlines()


In [99]:
k_examples = reports.query.near_text(query=conversation, limit=2).objects
print(k_examples)

[Object(uuid=_WeaviateUUIDInt('5447d34c-f020-4c79-b2db-9fbbd8182972'), metadata=MetadataReturn(creation_time=None, last_update_time=None, distance=None, certainty=None, score=None, explain_score=None, is_consistent=None, rerank_score=None), properties={'conversation': "[doctor] hi , vincent . how are you ?\n[patient] i'm good . how about you ?\n[doctor] i'm good . so le- are you ready to get started ?\n[patient] i am .\n[doctor] okay . vincent is a 56-year-old male here with abnormal lab findings . so , i've heard you were in the er , vincent , and they found that you had a low hemoglobin .\n[patient] yup .\n[doctor] were you having some dizziness and some lightheadedness ?\n[patient] i was very lightheaded . i- i do n't know . very lightheaded .\n[doctor] okay . and have you noticed bleeding from anywhere ?\n[patient] i have not . i have n't hurt myself in quite a while . maybe a slight nick from a knife while chopping some onions , but nothing more than that .\n[doctor] but no blood 

In [112]:
k_shot_examples = ""
for i, example in enumerate(k_examples):
    example_summary = example.properties["summary"]
    example_conversation = example.properties["conversation"]
    k_shot_examples += f"Example #1: \nConversation:\n{example_conversation}\n\nSummary:\n{example_summary}\n"

# You may use the following examples to help you write the summary
k_shot_prompt = f"""
<s>[INST] Write a summary of the following conversation between a doctor and a patient:\n{conversation}

Here are some examples of conversations and their summaries to help you write the summary, please follow the same format as the examples:
{k_shot_examples}
[/INST]
"""

print(k_shot_prompt)


<s>[INST] Write a summary of the following conversation between a doctor and a patient:
Doctor: Good morning, Mr. Smith. How are you feeling today?
Patient: Good morning, Doctor. I've been better, to be honest. I've been having some chest pain and shortness of breath lately.
Doctor: I'm sorry to hear that. How long have you been experiencing these symptoms?
Patient: It's been about a week now. At first, I thought it might just be stress, but it's been getting worse.
Doctor: I see. Have you noticed if anything triggers these symptoms or if they occur at certain times of the day?
Patient: Well, the chest pain seems to come on after I've been active or when I'm feeling particularly stressed.
Doctor: Okay. Have you had any previous heart-related issues or any family history of heart disease?
Patient: Not that I know of, Doctor.
Doctor: Alright. I'd like to conduct a physical examination and run some tests to better understand what might be causing your symptoms. We may need to perform an 

In [111]:
model_inputs = tokenizer(k_shot_prompt, return_tensors="pt").to(device)

generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)

k_shot_output = decoded[0]
k_shot_resume = k_shot_output.split("[/INST]")[1]

print(k_shot_resume)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



Summary:

Mr. Smith presented to the doctor's office complaining of chest pain and shortness of breath that had been ongoing for a week. He was unsure if these symptoms were related to stress or an underlying condition. The doctor performed a physical examination and ordered tests including an EKG, chest X-ray, and blood work to better understand the cause of his symptoms. The patient was advised to avoid strenuous activities and manage stress levels until the test results were available. No previous heart-related issues or family history of heart disease was reported. The doctor will follow up once the test results are obtained.</s>


In [96]:
results = bertscore.compute(predictions=[k_shot_resume], references=[conversation], lang="en")
print(results)

{'precision': [0.8896547555923462], 'recall': [0.8447261452674866], 'f1': [0.8666084408760071], 'hashcode': 'roberta-large_L17_no-idf_version=0.3.12(hug_trans=4.37.2)'}
