In [1]:
# Install and import the necessary libraries
!pip install -q torch
!pip install -q -U accelerate peft bitsandbytes transformers trl einops

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
from huggingface_hub import login
login(token="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")

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
import os
import torch
from datasets import load_dataset
from datasets import load_from_disk
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    pipeline,
    logging,
)

from trl import SFTTrainer

In [5]:
# Model
base_model = "meta-llama/Meta-Llama-3-8B-Instruct"
new_model = "llama-medbot"

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
tokenizer.pad_token=tokenizer.eos_token
tokenizer.padding_side="right"

In [6]:
from datasets import load_dataset, DatasetDict, Dataset
dataset = load_dataset("keivalya/MedQuad-MedicalQnADataset")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define a function to transform the dataset
def format_example(example):
    # Use the specified format for text
    example["text"] = f"### Instruction: {example['Question']} ### Assistant: {example['Answer']}"
    return example

train_test_split = dataset['train'].train_test_split(test_size=0.2)
train_dataset=train_test_split["train"]
train_dataset = train_dataset
test_dataset=train_test_split["test"]
test_dataset = test_dataset

# Apply the transformation to both the train and test datasets
train_dataset = train_dataset.map(format_example)

# Remove unnecessary columns and keep only the "text" column
train_dataset = train_dataset.remove_columns(["qtype", "Question", "Answer"])
# print(formatted_dataset)

train_dataset=train_dataset.select(range(6000))

# Preview the dataset
print(train_dataset)
print(test_dataset)

Map:   0%|          | 0/13125 [00:00<?, ? examples/s]

Dataset({
    features: ['text'],
    num_rows: 6000
})
Dataset({
    features: ['qtype', 'Question', 'Answer'],
    num_rows: 3282
})


In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [8]:
# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load base moodel
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    device_map={"": torch.cuda.current_device()}
)

model = model.to(device)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [9]:
model.config.use_cache = False
model.config.pretraining_tp = 1
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

In [10]:
# Set training arguments
training_arguments = TrainingArguments(
    output_dir = "./results",
    num_train_epochs = 1,
    fp16 = False,
    bf16 = False,
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 2,
    gradient_accumulation_steps = 1,
    gradient_checkpointing = True,
    max_grad_norm = 0.3,
    learning_rate = 2e-4,
    weight_decay = 0.001,
    optim = "paged_adamw_32bit",
    lr_scheduler_type = "cosine",
    max_steps = -1,
    warmup_ratio = 0.03,
    group_by_length = True,
    save_steps = 0,
    logging_steps = 100,
)

# LoRA configuration
peft_config = LoraConfig(
    r=64,                   #default=8
    lora_alpha= 16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules = ["q_proj", "v_proj"]
)


In [11]:
#print_trainable_parameters(model)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length= 200,
    tokenizer=tokenizer,
    args=training_arguments,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

In [12]:
# Train model
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113127755553858, max=1.0…

  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
100,1.6514
200,1.2335
300,1.138
400,1.0937
500,1.0631
600,1.0413
700,1.0967
800,1.0478
900,1.0493
1000,1.0246


TrainOutput(global_step=3000, training_loss=1.0628282267252604, metrics={'train_runtime': 16847.7978, 'train_samples_per_second': 0.356, 'train_steps_per_second': 0.178, 'total_flos': 4.357909859003597e+16, 'train_loss': 1.0628282267252604, 'epoch': 1.0})

In [13]:
trainer.model.save_pretrained("./phi2_finetuned_subjective/final_model")

In [14]:
from peft import PeftModel
f_model = PeftModel.from_pretrained(model,'/kaggle/working/phi2_finetuned_subjective/final_model')
f_model = f_model.merge_and_unload()



In [15]:
print(test_dataset[0])

{'qtype': 'symptoms', 'Question': 'What are the symptoms of Carbamoyl phosphate synthetase 1 deficiency ?', 'Answer': 'What are the signs and symptoms of Carbamoyl phosphate synthetase 1 deficiency? The Human Phenotype Ontology provides the following list of signs and symptoms for Carbamoyl phosphate synthetase 1 deficiency. If the information is available, the table below includes how often the symptom is seen in people with this condition. You can use the MedlinePlus Medical Dictionary to look up the definitions for these medical terms. Signs and Symptoms Approximate number of patients (when available) Aminoaciduria 90% Hyperammonemia 90% Muscular hypotonia 90% Respiratory insufficiency 90% Seizures 90% Stroke 5% Ataxia - Autosomal recessive inheritance - Cerebral edema - Coma - Episodic ammonia intoxication - Failure to thrive - Hypoargininemia - Intellectual disability - Irritability - Lethargy - Low plasma citrulline - Protein avoidance - Respiratory alkalosis - Vomiting - The Hum

In [17]:
from tqdm import tqdm

f_model.eval()  # Set the model to evaluation mode
predictions = []
all_preds = []  # Ensure this is defined for appending predictions
device = next(f_model.parameters()).device  # Ensure device compatibility
batch_size = 1  # Set a batch size if needed

for i, question in enumerate(tqdm(test_dataset, desc="Generating predictions", unit="question")):
    prompt = question['Question']

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(inputs['input_ids'], max_length=70)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    predictions.append(response)

Generating predictions:   0%|          | 0/3282 [00:00<?, ?question/s]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Generating predictions:   0%|          | 1/3282 [00:05<4:55:23,  5.40s/question]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Generating predictions:   0%|          | 2/3282 [00:10<4:51:36,  5.33s/question]The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Generating predictio

In [18]:
import csv

# Open the CSV file in write mode
with open("output.csv", "w", newline="") as f:
    writer = csv.writer(f)
    
    # Optional: Write a header if needed
    writer.writerow(["Prediction"])

    # Write each item from the predictions list
    for item in predictions:
        writer.writerow([item])

In [20]:
import csv

# Open the CSV file in write mode
with open("true_output.csv", "w", newline="") as f:
    writer = csv.writer(f)
    
    # Optional: Write a header if needed
    writer.writerow(["Prediction"])

    # Write each item from the predictions list
    for item in test_dataset['Answer']:
        writer.writerow([item])

In [None]:
!pip -q install evaluate
!pip -q install rouge-score
# Load ROUGE for evaluation
import evaluate

# Load ROUGE for evaluation
rouge = evaluate.load("rouge")

# Evaluate predictions


# Prepare references (ground-truth answers)
references = test_dataset['Answer']

# Evaluate predictions

In [None]:
scores = rouge.compute(predictions=predictions, references=references)
print(scores)

In [None]:
bleu = evaluate.load("bleu")
scores = bleu.compute(predictions=predictions, references=references)
print(scores)

In [None]:
from tqdm import tqdm

model.eval()  # Set the model to evaluation mode
predictions = []
all_preds = []  # Ensure this is defined for appending predictions
device = next(model.parameters()).device  # Ensure device compatibility
batch_size = 1  # Set a batch size if needed

for i, question in enumerate(tqdm(test_dataset, desc="Generating predictions", unit="question")):
    # Prepare inputs for the model
#     print(question)
#     break
    prompt = question['Question']
#     instruction = f"### Instruction: {prompt} "
#     print(instruction)
    
#     pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
#     result = pipe(instruction)
#     print(result[0]['generated_text'][len(instruction):])

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = model.generate(inputs['input_ids'], max_length=70)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Store the response
#     print(response)
#     print(response.split(instruction))
    predictions.append(response)

    # Optionally, append predictions to all_preds every 500 questions
#     if (i + 1) % 500 == 0:
#         all_preds.append(predictions)
#     break
# return predictions


In [None]:
# print(predictions)

In [None]:
# !pip -q install evaluate
# !pip -q install rouge-score
# # Load ROUGE for evaluation
# import evaluate

# # Load ROUGE for evaluation
# rouge = evaluate.load("rouge")

# # Evaluate predictions


# # Prepare references (ground-truth answers)
# references = test_dataset['Answer']

# # Evaluate predictions



In [None]:
# scores = rouge.compute(predictions=predictions, references=references)
# print(scores)

In [None]:
# bleu = evaluate.load("bleu")
# scores = bleu.compute(predictions=predictions, references=references)
# print(scores)

In [None]:
# !pip install nltk
# import nltk
# nltk.download('wordnet')
# nltk.download('omw-1.4')
# meteor = evaluate.load("meteor")
# scores = meteor.compute(predictions=predictions, references=references)
# print(scores)

In [None]:
# !pip -q install bert_score
# from bert_score import score

# P, R, F1 = score(predictions, references, lang="en")
# print(f"BERTScore F1: {F1.mean().item()}")