# Imports

In [None]:
# %pip install -q evaluate
# %pip install -q opendatasets
# %pip install -q --upgrade accelerate
# %pip install -q --upgrade transformers
# %pip install -q peft
# %pip install -q --upgrade bitsandbytes
# %pip install -q accelerate
# %pip install -q trl

In [1]:
import pandas as pd 
import torch
import torch.nn as nn
torch.cuda.set_per_process_memory_fraction(0.9)
torch.backends.cuda.matmul.allow_tf32 = True
import torchtext
from torch.utils.data import Dataset, random_split
from typing import List, Dict, Union
from typing import Any, TypeVar
import pandas as pd
import os
import copy
import gc
import evaluate
import opendatasets as od
from huggingface_hub import login
from typing import Optional, Tuple, Union

from datasets import load_dataset, Features, Value
from datasets import Dataset
import accelerate

from peft import LoftQConfig, LoraConfig, get_peft_model, PeftModel

import transformers
from transformers.modeling_outputs import QuestionAnsweringModelOutput
from transformers import BertLMHeadModel, AutoConfig, BitsAndBytesConfig,Conv1D
from transformers import AutoTokenizer, Seq2SeqTrainingArguments 
from transformers import Seq2SeqTrainer, AutoModelForCausalLM, IntervalStrategy, AutoModelForQuestionAnswering
from transformers import TrainingArguments
from trl import SFTTrainer

from sklearn.model_selection import train_test_split

set a seed and confirm CUDA support

In [2]:
torch.manual_seed(2137)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.deterministic = True

print("PyTorch Version: ", torch.__version__)
print("torchtext Version: ", torchtext.__version__)
print(f"Using {'GPU' if str(DEVICE) == 'cuda' else 'CPU'}.")

PyTorch Version:  2.2.1+cu121
torchtext Version:  0.17.1+cpu
Using GPU.


# Dataset Download

## Downloading MedDialog Dataset

NOTE: you will need a kaggle API key for the following to work

In [3]:
import json

# Path to JSON file
json_file_path = "kaggle.json"

# Open the file and read the content
try:
  with open(json_file_path, "r") as f:
    json_data = json.load(f)
except FileNotFoundError:
  print(f"Error: JSON file not found at {json_file_path}")
  exit(1)

# Access username and key from the JSON data
try:
  username = json_data["username"]
  key = json_data["key"]
except KeyError:
  print("Error: 'username' or 'key' key not found in JSON data")
  exit(1)

In [None]:
os.environ['KAGGLE_USERNAME'] = username
os.environ['KAGGLE_KEY'] = key

# Assign the Kaggle data set URL into variable
dataset = 'https://www.kaggle.com/datasets/dsxavier/diagnoise-me'
# Using opendatasets let's download the data sets
od.download(dataset, "dataset")

## Downloading USMLE Dataset

In [4]:
USMLE_dataset = load_dataset("GBaker/MedQA-USMLE-4-options", split="test")

In [5]:
print(USMLE_dataset[0])
print(len(USMLE_dataset))

{'question': 'A junior orthopaedic surgery resident is completing a carpal tunnel repair with the department chairman as the attending physician. During the case, the resident inadvertently cuts a flexor tendon. The tendon is repaired without complication. The attending tells the resident that the patient will do fine, and there is no need to report this minor complication that will not harm the patient, as he does not want to make the patient worry unnecessarily. He tells the resident to leave this complication out of the operative report. Which of the following is the correct next action for the resident to take?', 'answer': 'Tell the attending that he cannot fail to disclose this mistake', 'options': {'A': 'Disclose the error to the patient and put it in the operative report', 'B': 'Tell the attending that he cannot fail to disclose this mistake', 'C': 'Report the physician to the ethics committee', 'D': 'Refuse to dictate the operative report'}, 'meta_info': 'step1', 'answer_idx': 

# Load Datasets

## Loading MedDialog Dataset

In [6]:
is_kaggle = (
    "KAGGLE_CLOUD" in os.environ or "KAGGLE_KERNEL_RUN_TYPE" in os.environ
)
if is_kaggle:
    DATA_PATH = "/kaggle/input/diagnoise-me/diagnose_en_dataset.feather"
else:
    DATA_PATH = "dataset\\diagnoise-me\\diagnose_en_dataset.feather"

SEQ_LEN: int = 1024
data = pd.read_feather(DATA_PATH)
SAMPLE_SIZE: int =  int(data.shape[0] * 0.015) #get 1% of the data
data = data[:SAMPLE_SIZE]
print(data.keys())
print(len(data))

Index(['id', 'Description', 'Doctor', 'Patient'], dtype='object')
3862


In [7]:
# Split data into train and eval sets with 70% for training
train_data, eval_data = train_test_split(data, test_size=0.3, random_state=42)

train_data = train_data.reset_index(drop=True)
eval_data = eval_data.reset_index(drop=True)

# Print the shapes of the train and eval sets
print("Train data shape:", train_data.shape)
print("Eval data shape:", eval_data.shape)

Train data shape: (2703, 4)
Eval data shape: (1159, 4)


## Loading USMLE Dataset

In [8]:
USMLE_dataset = pd.DataFrame({'Doctor': USMLE_dataset["answer"], 'Patient': USMLE_dataset["question"], 'Options':USMLE_dataset["options"]})
# Print the shapes of the set
print("USMLELiveEQA data shape:", USMLE_dataset.shape)

USMLELiveEQA data shape: (1273, 3)


## Create an output directory

In [9]:
os.makedirs('./results', exist_ok = True)
OUTPUT_DIR: str = './results'

# Model

In [10]:
# tokens for the datset
MODEL_NAME: str = 'UnfilteredAI/Mia-1B'

In [11]:
# Load tokenizer 
MAX_TOKEN_LENGTH = 1024

# for evaluation
ltokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
ltokenizer.padding_side = 'left'
ltokenizer.truncation_side = 'left'

# for training
rtokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
rtokenizer.padding_side = 'right'
rtokenizer.truncation_side = 'right'

In [16]:
base_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
#base_model.resize_token_embeddings(len(rtokenizer))

config.json:   0%|          | 0.00/648 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/145 [00:00<?, ?B/s]

In [None]:
print(base_model)

In [145]:
lora_config = LoraConfig(
    lora_alpha=16, # lora alpha for scaling
    r=16, # rank
    lora_dropout=0.05, #dropout
    use_rslora=True, #  sets the adapter scaling factor to lora_alpha/math.sqrt(r)
    bias="none", # dont train biases
    target_modules=["q_proj", "v_proj"],
    task_type="CAUSAL_LM",
    #layers_to_transform=[20]
)
# model = get_peft_model(base_model, lora_config)
# model.gradient_checkpointing_enable()
# model.enable_input_require_grads()

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
    return {"trainable": trainable_params, "all": all_param, "trainable%": 100 * trainable_params / all_param}

print_trainable_parameters(model)

# Preparing Data for Training

## Custom Dataset

In [12]:
# class DoctorPatientDataset(Dataset):
    
#     def __init__(self, data, split):
        
#         self.input_x: List = data["Patient"]
#         self.input_x = self.input_x.reset_index(drop=True)
#         self.target: List = data["Doctor"]
#         self.target = self.target.reset_index(drop=True)
#         self.split = split

#         try:
#             self.options: List = data["Options"]
#         except:
#             pass
            
#     def __len__(self):
#         return len(self.input_x)
    
#     def __getitem__(self, idx):
#         try:
#             data = {
#                 'input': self.input_x[idx],
#                 'target': self.target[idx],
#                 'options': self.options[idx],
#                 'split': self.split
#             }
#         except:
#             data = {
#                 'input': self.input_x[idx],
#                 'target': self.target[idx],
#                 'split': self.split
#             }
#         return data

# class DoctorPatientDataset(Dataset):
    
#     def __init__(self, data, split):
        
#         self.input_x: List = data["Patient"]
#         self.input_x = self.input_x.reset_index(drop=True)
#         self.target: List = data["Doctor"]
#         self.target = self.target.reset_index(drop=True)
#         self.split = split

#         try:
#             self.options: List = data["Options"]
#         except:
#             pass
            
#     def __len__(self):
#         return len(self.input_x)
    
#     def __getitem__(self, idx):
#         try:
#             data = {
#                 'messages': [
#                     {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
#                     {"role": "user", "content": f"{self.input_x[idx]}, choose from A) {self.options[idx]['A']}, B) {self.options[idx]['B']}, C) {self.options[idx]['C']}, D)  {self.options[idx]['D']}"},
#                     {"role": "assistant", "content": self.target[idx]}
#                 ]
#             }
#         except:
#             data = {
#                 'messages': [
#                     {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
#                     {"role": "user", "content":{self.input_x[idx]}},
#                     {"role": "assistant", "content": self.target[idx]}
#                 ]
#             }
#         return data

def build_dataset(data, split):
    listed_data = []
    try:
                listed_data = [[
                        {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
                        {"role": "user", "content": f"{patient}, choose from A) {options['A']}, B) {options['B']}, C) {options['C']}, D)  {options['D']}"},
                        {"role": "assistant", "content": doctor}
                    ]for patient, doctor, options in zip(data["Patient"], data["Doctor"], data["Options"])]
    except:
                listed_data =  [[
                        {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
                        {"role": "user", "content":patient},
                        {"role": "assistant", "content": doctor}
                    ]for patient, doctor in zip(data["Patient"], data["Doctor"])]
    dataset = {"messages": listed_data}
    dataset = Dataset.from_dict(dataset)
    return dataset
                

In [125]:
# train_dataset = DoctorPatientDataset(data = train_data, split = "train")
# eval_dataset_1 = DoctorPatientDataset(data = eval_data, split = "eval")
# eval_dataset_2 = DoctorPatientDataset(data = USMLE_dataset, split = "eval")

# test_dataset = DoctorPatientDataset(data = eval_data[1:2], split = "eval")

# test_data = [["what's the answer to life, the universe, and everything", "42"]]
# test_data = pd.DataFrame(test_data, columns=["Patient", "Doctor"])
# test_train_dataset = DoctorPatientDataset(data = test_data, split = "train")

train_dataset = build_dataset(train_data)
eval_dataset_1 = build_dataset(eval_data)
eval_dataset_2 = build_dataset(USMLE_dataset)

test_dataset = build_dataset(eval_data[1:3])

test_data = [["what's the answer to life, the universe, and everything", "42"]]
test_data = pd.DataFrame(test_data, columns=["Patient", "Doctor"])
test_train_dataset = build_dataset(test_data)

In [14]:
print(test_train_dataset[0])

{'messages': [{'content': 'You are a medical professional providing consultation and medical diagnostics.', 'role': 'system'}, {'content': "what's the answer to life, the universe, and everything", 'role': 'user'}, {'content': '42', 'role': 'assistant'}]}


## Custom Data Collator

In [93]:
def format_text(message, tokenizer, add_generation_prompt):
    text = tokenizer.apply_chat_template(
        message,
        tokenize=False,
        add_generation_prompt=add_generation_prompt
    )
    return text

def custom_data_collator(features, return_tensors="pt"):
    batch = {}

    tokenizer = ltokenizer

    messages = [feature['messages'][0:2] for feature in features]

    text = list(map(lambda x: format_text(x, tokenizer, True), messages))

    print(text)
    
    encoding = tokenizer(text, padding=True, max_length=MAX_TOKEN_LENGTH, return_tensors=return_tensors, add_special_tokens=True)
    # encoding = tokenizer(text, truncation=True, padding='max_length', max_length=512, return_tensors=return_tensors, add_special_tokens=False)

    # Prepare final batch dictionary
    batch["input_ids"] = encoding["input_ids"]
    batch["attention_mask"] = encoding["attention_mask"]

    # if return_tensors in ["pt", "tf"]:
    #     if split == "train":
    #         labels_text = list(map(lambda x: format_text(x, tokenizer, False), labels_messages))
    #         # print("=============================")
    #         # print(labels_text)
    #         labels_encoding = tokenizer(labels_text, padding=True, max_length=MAX_TOKEN_LENGTH, return_tensors=return_tensors, add_special_tokens=True)
    #         batch["labels"] = labels_encoding["input_ids"]
    #     else:
    #         batch["labels"] = copy.deepcopy(encoding["input_ids"])
    return batch

# Training

In [15]:
training_args = TrainingArguments(
    output_dir = OUTPUT_DIR, 
    num_train_epochs = 1, 
    evaluation_strategy="steps",
    #eval_steps = 50,
    #logging_steps = 50,
    save_total_limit = 1,
    per_device_train_batch_size=8, 
    per_device_eval_batch_size=1,
    bf16=False,
    fp16=True,
    warmup_steps=0, 
    weight_decay=0.01, 
    logging_dir='./logs',
    save_steps = 0,
    load_best_model_at_end=True,
    eval_accumulation_steps=10,
    report_to=['tensorboard']
    )

In [None]:
# def formatting_prompts_func(inputs):
#     for input in inputs:
#         try:
#                 data = {
#                     'messages': [
#                         {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
#                         {"role": "user", "content": f"{input['Patient']}, choose from A) {input['Options']['A']}, B) {input['Options']['B']}, C) {input['Options']['C']}, D)  {input['Options']['D']}"},
#                         {"role": "assistant", "content": input["Doctor"]}
#                     ]
#                 }
#         except:
#                 data = {
#                     'messages': [
#                         {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
#                         {"role": "user", "content":input["Patient"]},
#                         {"role": "assistant", "content": input["Doctor"]}
#                     ]
#                 }
#         return data

In [83]:
# trainer = Seq2SeqTrainer(
#     model=model, 
#     args=training_args, 
#     train_dataset=train_dataset,
#     eval_dataset=eval_dataset_1, 
#     data_collator=custom_data_collator
# )
trainer = SFTTrainer(
    model=base_model, 
    args=training_args, 
    peft_config=lora_config,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset_1, 
    max_seq_length=1024,
    #data_collator=custom_data_collator,
    #dataset_text_field="messages",
    packing=False
)
trainer.model.gradient_checkpointing_enable()
trainer.model.enable_input_require_grads()

Map:   0%|          | 0/2703 [00:00<?, ? examples/s]

Map:   0%|          | 0/1159 [00:00<?, ? examples/s]

In [None]:
model.config.pad_token_id = ltokenizer.pad_token_id

In [86]:
# trainer = None
# model = None
# base_model = None
torch.cuda.empty_cache()
gc.collect()

0

In [87]:
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss


TrainOutput(global_step=338, training_loss=2.0002365563748152, metrics={'train_runtime': 1670.5847, 'train_samples_per_second': 1.618, 'train_steps_per_second': 0.202, 'total_flos': 1.0065805950799872e+16, 'train_loss': 2.0002365563748152, 'epoch': 1.0})

In [None]:
trainer.model.save_pretrained(f"{OUTPUT_DIR}/model_save")

In [88]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
tok = AutoTokenizer.from_pretrained(MODEL_NAME)
tok.push_to_hub("SurtMcGert/advanced-AI-CW-Med-Chat-Bot")

In [89]:
trainer.model.push_to_hub("SurtMcGert/advanced-AI-CW-Med-Chat-Bot")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/9.02M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/SurtMcGert/advanced-AI-CW-Med-Chat-Bot/commit/217f27b7a5aa155d8f8acc8570f8af3af5301763', commit_message='Upload model', commit_description='', oid='217f27b7a5aa155d8f8acc8570f8af3af5301763', pr_url=None, pr_revision=None, pr_num=None)

# Load the Model

In [48]:
eval_args = Seq2SeqTrainingArguments(
    output_dir = OUTPUT_DIR, 
    num_train_epochs = 1, 
    evaluation_strategy="steps",
    save_total_limit = 1,
    per_device_train_batch_size=8, 
    per_device_eval_batch_size=1,
    bf16=False,
    fp16=True,
    warmup_steps=0, 
    weight_decay=0.01, 
    logging_dir='./logs',
    save_steps = 0,
    load_best_model_at_end=True,
    remove_unused_columns=False,
    generation_config=transformers.GenerationConfig(
            max_length=MAX_TOKEN_LENGTH,
            num_beams=5,
    ),
    predict_with_generate=True,
    generation_max_length=MAX_TOKEN_LENGTH,
    eval_accumulation_steps=10,
    report_to=['none']
    )

In [147]:
# base_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
# model = PeftModel.from_pretrained(base_model, f"{OUTPUT_DIR}/model_save")
model = AutoModelForCausalLM.from_pretrained("SurtMcGert/advanced-AI-CW-Med-Chat-Bot").to(DEVICE)
model.config.pad_token_id = ltokenizer.pad_token_id
model.config.max_length = MAX_TOKEN_LENGTH
#model.gradient_checkpointing_enable()
#model.enable_input_require_grads()
evaluator = Seq2SeqTrainer(
    model=model, 
    args=eval_args, 
    train_dataset=train_dataset,
    eval_dataset=eval_dataset_1, 
    data_collator=custom_data_collator
)

config.json:   0%|          | 0.00/648 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/145 [00:00<?, ?B/s]

In [148]:
# model = None
# evaluator = None
torch.cuda.empty_cache()
gc.collect()

2809

# Evaluation

In [158]:
eval_result_1 = evaluator.predict(eval_dataset_1, max_new_tokens=10)

['<|system|>You are a medical professional providing consultation and medical diagnostics.</s><|user|>Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time\xa0and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take.</s><|assistant|>']
['<|system|>You are a medical professional providing consultation and medical diagnostics.</s><|user|>Hello doctor, I would like to know about urinary incontinence in women. Why is urinary incontinence more prevalent in women? Why do pregnancy and menopause contribute to incontinence?</s><|assistant|>']




  0%|          | 0/2 [00:00<?, ?it/s]

In [159]:
eval_result_2 = evaluator.predict(eval_dataset_2, max_new_tokens=10)

['<|system|>You are a medical professional providing consultation and medical diagnostics.</s><|user|>Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time\xa0and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take.</s><|assistant|>']
['<|system|>You are a medical professional providing consultation and medical diagnostics.</s><|user|>Hello doctor, I would like to know about urinary incontinence in women. Why is urinary incontinence more prevalent in women? Why do pregnancy and menopause contribute to incontinence?</s><|assistant|>']


  0%|          | 0/2 [00:00<?, ?it/s]

In [160]:
logits_1 = eval_result_1.predictions
logits_1[logits_1 == -100] = ltokenizer.eos_token_id
logits_2 = eval_result_2.predictions
logits_2[logits_2 == -100] = ltokenizer.eos_token_id

In [161]:
# get the raw evaluation output
raw_text_result_1 = ltokenizer.batch_decode(logits_1, skip_special_tokens=True)
raw_text_result_2 = ltokenizer.batch_decode(logits_2, skip_special_tokens=True)

In [170]:
for item in test_dataset['messages'][1]:
    print(item['content'])

You are a medical professional providing consultation and medical diagnostics.
Hello doctor, I would like to know about urinary incontinence in women. Why is urinary incontinence more prevalent in women? Why do pregnancy and menopause contribute to incontinence?
Hello. Female, unlike men, have short urethra and less effective urinary continence systems than men as a part of normal anatomy. Female normal continence is dependent on normal sphincter anatomy, normal vaginal anatomy, and pelvis muscle strength. Thus any factor causing changes in the above-mentioned area can lead to incontinence. After vaginal birth delivery, it is a normal process that the pelvic floor muscle becomes lax thus support system of bladder is reduced. This leads to hypermobile urethra and defected continence system. So problem like stress incontinence, urine leak during increase abdominal strainings like cough and sneezing, are increased and are typically common in elderly women with a history of vaginal deliver

In [173]:
# get the questions and ground truths from both evaluation datasets
questions_1 = []
ground_truth_1 = []
try:
    for item in eval_dataset_1['messages']:
        questions_1.append(item[1]["content"])
        ground_truth_1.append(item[2]["content"])
except:
    pass

questions_2 = []
ground_truth_2 = []
try:
    for item in eval_dataset_2['messages']:
        questions_2.append(item[1]["content"])
        ground_truth_2.append(item[2]["content"])
except:
    pass

# create lists for the text outputs
text_result_1 = list()
text_result_2 = list()

# get the answers for the MedDialog dataset
for item in raw_text_result_1:
    index = item.find("<|assistant|>")
    output = item[index+13:]
    index = output.find(ltokenizer.eos_token)
    if(index > -1):
        output = output[:index]
    text_result_1.append(output)


# get the answers for the USMLE dataset
for item in raw_text_result_2:
    index = item.find("<|assistant|>")
    output = item[index+13:]
    index = output.find(ltokenizer.eos_token)
    if(index > -1):
        output = output[:index]
    text_result_2.append(output)



# print the first 2 results from each dataset evaluation
print("============================MedDialog Evaluation============================")
for question, gt, answer in list(zip(questions_1, ground_truth_1, text_result_1))[:2]:
    print(f"""
    Question: {question}
    Ground Truth: {gt}
    Answer: {answer}
    """)

print("============================USMLE Evaluation============================")
for question, gt, answer in list(zip(questions_2, ground_truth_2, text_result_2))[:2]:
    print(f"""
    Question: {question}
    Ground Truth: {gt}
    Answer: {answer}
    """)


    Question: Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take.
    Ground Truth: Hi. Blood test to check for hemoglobin level, total count and differential count. Cough in pregnancy. Revert back after the investigations to an obstetrician and gynaecologist online.---> https://www.icliniq.com/ask-a-doctor-online/obstetrician-and-gynaecologist  
    Answer: Hi. For further information consult an obstetric
    

    Question: Hello doctor, I would like to know about urinary incontinence in women. Why is urinary incontinence more prevalent in women? Why do pregnancy and menopause contribute to incontinence?
    Ground Truth: Hello. Female, unlike men, have short urethra and less effective urinary continence systems than 

# Results

## Load the Required Evaluation Metrics

In [None]:
# perplexity - measures certainty of the model.
# METEOR - extension of BLEU (measure similarity between the output and the ground truth) but accounts for word semantics.
# ROUGE - considers n-gram overlap (recall) but also precision.
# SQuAD v2 - a metric for measuring a models correctness in answering the multiple choice questions
# Accuracy - use this for the multiple choice dataset

perplexity_scorer = evaluate.load('perplexity')
meteor_scorer = evaluate.load('meteor')
rouge_scorer = evaluate.load('rouge')
squad_scorer = evaluate.load('squad_v2')
accuracy_scorer = evaluate.load('accuracy')


In [None]:
# compute the bleu and rouge scores for the MedDialog evaluation
perplexity_score_1 = perplexity_scorer.compute(predictions=text_result_1, references=ground_truth_1)
meteor_score_1 = meteor_scorer.compute(predictions=text_result_1, references=ground_truth_1)
rouge_score_1 = rouge_scorer.compute(predictions=text_result_1, references=ground_truth_1)
squad_score_1 = squad_scorer.compute(predictions=text_result_1, references=ground_truth_1)


# compute the bleu and rouge scores for the USMLE evaluation
accuracy_score_2 = accuracy_scorer.compute(predictions=text_result_1, references=ground_truth_2)

In [None]:
# print scores for MedDialog evaluation


In [None]:
# print scores for USMLE evaluation


# TESTING JUST IGNORE ALL THIS

## TEST 1

In [124]:
print(test_dataset[0])

{'messages': [{'content': 'You are a medical professional providing consultation and medical diagnostics.', 'role': 'system'}, {'content': 'Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time\xa0and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take.', 'role': 'user'}, {'content': 'Hi. Blood test to check for hemoglobin level, total count and differential count. Cough in pregnancy. Revert back after the investigations to an obstetrician and gynaecologist online.---> https://www.icliniq.com/ask-a-doctor-online/obstetrician-and-gynaecologist  ', 'role': 'assistant'}]}


In [98]:
test_1_result = evaluator.predict(test_dataset, max_new_tokens=100)

['<|system|>You are a medical professional providing consultation and medical diagnostics.</s><|user|>Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time\xa0and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take.</s><|assistant|>']




  0%|          | 0/1 [00:00<?, ?it/s]

In [99]:
print(test_1_result.predictions)

[[    1   529 29989 ...  2045   597  1636]]


In [100]:
logits_test_1 = test_1_result.predictions
logits_test_1[logits_test_1 == -100] = ltokenizer.eos_token_id

In [101]:
raw_text_result_test_1 = ltokenizer.batch_decode(logits_test_1, skip_special_tokens=True)

In [102]:
print(raw_text_result_test_1)

['<|system|>You are a medical professional providing consultation and medical diagnostics. <|user|>Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time\xa0and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take. <|assistant|>Hi. For further information consult an obstetrician and gynaecologist online --> https://www.icliniq.com/ask-a-doctor-online/obstetrician-and-gynaecologist   For more information consult an obstetrician and gynaecologist online --> https://www.icliniq.com/ask-a-doctor-online/obstetrician-and-gynaecologist   For more information consult an obstetrician and gynaecologist online --> https://www.icliniq.com/ask-a-doctor-online/obstetrician-and-gynaecologist   For more information consult an obstetrician and gynaecologist online --> https://www.icliniq.com/ask-a

## TEST 2

In [103]:
initial_prompt = test_dataset[0]['messages'][1]['content']

In [105]:
prompt = f"a medical student is preparing for her final examination. Her patient has come to her asking: '{initial_prompt}'. Explain to the student the most likely cause/course of action."
messages = [
    {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
    {"role": "user", "content": prompt}
]

In [106]:
text = ltokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

print(text)

test_model_inputs_1 = ltokenizer(text, return_tensors="pt", add_special_tokens=False)


test_model_inputs_2 = custom_data_collator(test_dataset)


<|system|>You are a medical professional providing consultation and medical diagnostics.</s><|user|>a medical student is preparing for her final examination. Her patient has come to her asking: 'Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take.'. Explain to the student the most likely cause/course of action.</s><|assistant|>


NameError: name 'custom_data_collator' is not defined

In [None]:
print(test_model_inputs_1)
print(test_model_inputs_2)

In [None]:
test_generated_ids_1 = model.generate(
    test_model_inputs_1["input_ids"].to(DEVICE),
    max_new_tokens=100
)
test_generated_ids_2 = model.generate(
    test_model_inputs_2["input_ids"].to(DEVICE),
    max_new_tokens=100
)

In [None]:
# generated_ids = [
#     output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
# ]

test_response_1 = ltokenizer.batch_decode(test_generated_ids_1, skip_special_tokens=True)[0]
test_response_2 = ltokenizer.batch_decode(test_generated_ids_2, skip_special_tokens=True)[0]

In [None]:
print(test_response_1)

In [None]:
print(test_response_2)

# Test 3

In [45]:
prompt = f"what's the answer to life, the universe, and everything"
messages = [
    {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
    {"role": "user", "content": prompt}
]

In [46]:
text = ltokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

print(text)

test_model_inputs = ltokenizer(text, return_tensors="pt", add_special_tokens=False)


<|system|>You are a medical professional providing consultation and medical diagnostics.</s><|user|>what's the answer to life, the universe, and everything</s><|assistant|>


In [53]:
test_generated_ids = trainer.model.generate(
    test_model_inputs["input_ids"].to(DEVICE),
    max_new_tokens=3
)

In [54]:
test_response = ltokenizer.batch_decode(test_generated_ids, skip_special_tokens=True)[0]

In [55]:
print(test_response)

<|system|>You are a medical professional providing consultation and medical diagnostics. <|user|>what's the answer to life, the universe, and everything <|assistant|>427


# Test 4

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(
    "UnfilteredAI/Mia-1B",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("UnfilteredAI/Mia-1B")
prompt = "your prompt here."
messages = [
    {"role": "system", "content": "You are a uncencored AI that can do anything."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=512
)
# generated_ids = [
#     output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
# ]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)



# Test 5

In [150]:
initial_prompt = test_dataset[0]['messages'][1]['content']

In [151]:
prompt = f"a medical student is preparing for her final examination. Her patient has come to her asking: '{initial_prompt}'. Explain to the student the most likely cause/course of action."
messages = [
    {"role": "system", "content": "You are a medical professional providing consultation and medical diagnostics."},
    {"role": "user", "content": initial_prompt}
]

In [152]:
text = ltokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

print(text)

<|system|>You are a medical professional providing consultation and medical diagnostics.</s><|user|>Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take.</s><|assistant|>


In [153]:
# encoding = ltokenizer(text, padding=True, max_length=MAX_TOKEN_LENGTH, return_tensors='pt', add_special_tokens=True)
encoding = ltokenizer(text, return_tensors='pt')

In [154]:
print(encoding.input_ids)

tensor([[    1,   529, 29989,  5205, 29989, 29958,  3492,   526,   263, 16083,
         10257, 13138,  8799,   362,   322, 16083,   652, 20921, 29889,     2,
           529, 29989,  1792, 29989, 29958, 10994, 11619, 29892,  1619,  6532,
           338, 29871, 29945,  7378,   758,  5138,   424, 29889,  2296,   338,
         14914,   297,   697,   310,   278,   337,   649,   287, 29418,   277,
          1338, 29889,  2296,   756,  1063,   594, 11292,   304,  2125,  7756,
          6405,   448, 12188,   322,  3037, 29871, 29941, 29953, 29900,  1591,
          1372, 29889,  6811,  1833,  2211,   304,  3023,  3841,  1183,   338,
          2534,   274,   820,   297,   278,  4646,   931, 30081,   392,  1183,
           338, 13421,   304,   671,   738,  1591,  1372,   470,   260,  8927,
         29889, 13187,   368,  4368,   607,  1591, 29873,   470,   260,  8927,
          1183,   881,  2125, 29889,     2,   529, 29989,   465, 22137, 29989,
         29958]])


In [155]:
test_generated_ids = evaluator.model.generate(
    encoding["input_ids"].to(DEVICE),
    max_new_tokens=100
)

In [156]:
decoded = ltokenizer.batch_decode(test_generated_ids, skip_special_tokens=True)[0]

In [157]:
print(decoded)

<|system|>You are a medical professional providing consultation and medical diagnostics. <|user|>Hello doctor, My wife is 5 months pregnant. She is treated in one of the reputed hospitals. She has been advised to take Feronia -XT and Cal 360 tablets. Over last three to four days she is having cough in the night time and she is afraid to use any tablets or tonic. Kindly suggest which tablet or tonic she should take. <|assistant|>Hi. For further information consult an obstetrician and gynaecologist online --> https://www.icliniq.com/ask-a-doctor-online/obstetrician-and-gynaecologist   For more information consult an obstetrician and gynaecologist online --> https://www.icliniq.com/ask-a-doctor-online/obstetrician-and-gynaecologist  


# Random stuff

In [None]:
test_train_input = custom_data_collator(test_train_dataset)