In [1]:
!pip install -q accelerate peft bitsandbytes transformers trl

In [2]:
import os, torch, logging
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline
from peft import LoraConfig, PeftModel
from trl import SFTTrainer



In [3]:
# Dataset
#data_name = "RafaelMPereira/HealthCareMagic-100k-Chat-Format-en"
#training_data = load_dataset(data_name, split="train")

# Model and tokenizer names
base_model_name = "NousResearch/Llama-2-7b-chat-hf"
refined_model = "/kaggle/input/llama2-1/llama-2-7b-mlabonne-enhanced"

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [4]:
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
return_dict=True,
torch_dtype=torch.float16,
device_map='auto',
)
model = PeftModel.from_pretrained(base_model, refined_model)
model = model.merge_and_unload()

save_dir = "/kaggle/working/merged_model"
model.save_pretrained(save_dir, safe_serialization=True)

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
tokenizer.save_pretrained(save_dir)


config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]


Thrown during validation:
`do_sample` is set to `False`. However, `temperature` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.


('/kaggle/working/merged_model/tokenizer_config.json',
 '/kaggle/working/merged_model/special_tokens_map.json',
 '/kaggle/working/merged_model/tokenizer.model',
 '/kaggle/working/merged_model/added_tokens.json',
 '/kaggle/working/merged_model/tokenizer.json')

In [5]:
!cd /kaggle/working

In [6]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/merged_model/config.json')

In [7]:
# Generate Text
query = "I feel uncomfortable with my abdominal, and get headache, what should I do?"
refined_model = "/kaggle/working/merged_model"

# Tokenizer
base_model_name = "NousResearch/Llama-2-7b-chat-hf"
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

text_gen = pipeline(task="text-generation", model=refined_model, tokenizer=llama_tokenizer, max_length=500)
output = text_gen(f"<human>: {query} <bot>:")
print(output[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

<human>: I feel uncomfortable with my abdominal, and get headache, what should I do? <bot>: Hi, Thanks for posting your query.
I have gone through your query and I understand your concerns. The symptoms you have mentioned are not specific to any particular disease. However, you should get a complete blood count, liver function test, thyroid function test and an ultrasound abdomen to rule out any underlying pathology. If the reports are normal, you can take a course of antibiotics and anti-inflammatory Chat Doctor.  If the symptoms persist, you should consult your doctor. I hope my answer helps. Please feel free to ask if you have any further queries. Wishing you good health, Chat Doctor. 30 minutes. Thank you. Take care.  Chat Doctor.  Internal Medicine.  Chat Doctor.  Specialist in Chronic Diseases.  Chat Doctor.  Specialist in Diabetes.  Chat Doctor.  Specialist in Cardiovascular Diseases.  Chat Doctor.  Specialist in Respiratory Diseases.  Chat Doctor.  Specialist in Cancer.  Chat D

In [8]:
query = "I feel uncomfortable with my abdominal, and get headache, what should I do?"
output = text_gen(f"<human>: {query} <bot>:")
print(output[0]['generated_text'])

<human>: I feel uncomfortable with my abdominal, and get headache, what should I do? <bot>: Hi, Thanks for posting your query.
I have gone through your query and I understand your concerns. The symptoms you have mentioned are not specific to any particular disease. However, you should get a complete blood count, liver function test, thyroid function test and an ultrasound abdomen to rule out any underlying pathology. If the reports are normal, you can take a course of antibiotics and anti-inflammatory Chat Doctor.  If the symptoms persist, you should consult your doctor. I hope my answer helps. Please feel free to ask if you have any further queries. Wishing you good health, Chat Doctor. 30 minutes. Thank you. Take care.  Chat Doctor.  Internal Medicine.  Chat Doctor.  Specialist in Chronic Diseases.  Chat Doctor.  Specialist in Diabetes.  Chat Doctor.  Specialist in Cardiovascular Diseases.  Chat Doctor.  Specialist in Respiratory Diseases.  Chat Doctor.  Specialist in Cancer.  Chat D

In [9]:
# Generate
from transformers import LlamaForCausalLM
refined_model = "/kaggle/working/merged_model"
text_gen = LlamaForCausalLM.from_pretrained(
    refined_model,
    #local_files_only=True
)

query = "I feel uncomfortable with my abdominal, and get headache, what should I do?"
input = llama_tokenizer(f"<human>: {query} <bot>:")
output = text_gen(**input)
print(output[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]