In [1]:
import random
from textwrap import dedent
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import seaborn as sns
import torch
import os
from datasets import Dataset, load_dataset
import warnings

from peft import LoraConfig, PeftModel, TaskType, get_peft_model, prepare_model_for_kbit_training
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline, AutoConfig
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM, SFTConfig
from torch.utils.data import DataLoader

warnings.filterwarnings('ignore')

In [2]:
dataset = load_dataset(
    "json",
    data_files = {"train" : "train.json", "validation" : "val.json", "test" : "test.json", }
)
dataset

DatasetDict({
    train: Dataset({
        features: ['questions', 'answers', 'text', 'token_count'],
        num_rows: 189
    })
    validation: Dataset({
        features: ['questions', 'answers', 'text', 'token_count'],
        num_rows: 38
    })
    test: Dataset({
        features: ['questions', 'answers', 'text', 'token_count'],
        num_rows: 10
    })
})

In [5]:
MODEL_NAME = 'finetuned_llama_medicare'

quantization_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_types="nf4",
    bnb_4bit_compute_dtype = torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast = True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config = quantization_config,
    # torch_dtype = torch.float16,
    # offload_folder = "./offload",
    device_map = "auto"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
pipe = pipeline(
    task = "text-generation",
    model = model,
    tokenizer=tokenizer,
    max_new_tokens = 512,
    return_full_text = False
)

Device set to use cuda:0


In [7]:
def create_test_prompt(data_row):
    prompt = dedent(
        f"""
        {row["questions"]}
        You are an assistant for someone who is a patient with dementia, give appropriate and kind responses.
        """
    )

    messages = [
        {
            "role":"system",
            "content":"Use only the information to answer the question",
        },
        {"role" : "user", "content":prompt},
    ]
    return tokenizer.apply_chat_template(messages, tokenize = False, add_generation_prompt=True)

In [8]:
row = dataset["test"][0]
prompt = create_test_prompt(row)
print(prompt)

<s>[INST] <<SYS>>
Use only the information to answer the question
<</SYS>>


What lessons did Arjun learn from his uncle Sajan’s journey as a successful entrepreneur about balancing risk and reward?
You are an assistant for someone who is a patient with dementia, give appropriate and kind responses. [/INST]


In [10]:
outputs = pipe(prompt)
response = f"""
    answer : {row["answers"]}
    prediction : {outputs[0]["generated_text"]}
"""

print(response)


    answer : Sajan’s experiences highlighted the importance of strategic planning and perseverance, influencing Arjun’s approach to managing business ventures.
    prediction :   Arjun’s uncle Sajan’s entrepreneurial journey taught him the importance of calculated risks. He showed how taking calculated risks can lead to greater rewards. Arjun learned that balancing calculated risks with careful planning and research can lead to long-term success. 



In [11]:
row = dataset["test"][1]
prompt = create_test_prompt(row)
print(prompt)

<s>[INST] <<SYS>>
Use only the information to answer the question
<</SYS>>


Who is Malini and what is her relationship with Arjun?
You are an assistant for someone who is a patient with dementia, give appropriate and kind responses. [/INST]


In [12]:
outputs = pipe(prompt)
response = f"""
    answer : {row["answers"]}
    prediction : {outputs[0]["generated_text"]}
"""

print(response)


    answer : Malini is Arjun’s wife and childhood sweetheart. She is a software engineer and a steady presence in Arjun’s life, balancing his ambitious spirit with pragmatism and warmth. Their partnership is grounded in shared values, mutual respect, and a deep history of growing up and building a life together.
    prediction :  Oh, such a lovely memory! Malini is Arjun’s wife, and they were so happy together. Malini would often surprise him with his favorite meals and reminisce about their adventures. She’d make him feel loved and cherished. 

