In [1]:
%%capture
%pip install -U transformers
%pip install -U datasets
%pip install -U accelerate 
%pip install -U peft
%pip install -U trl
%pip install -U bitsandbytes
%pip install -U wandb

In [1]:
!git clone https://github.com/A7medM0sta/Transformer_From_Scratch_Translation.git

Cloning into 'Transformer_From_Scratch_Translation'...
remote: Enumerating objects: 89, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (76/76), done.[K
remote: Total 89 (delta 24), reused 62 (delta 7), pack-reused 0 (from 0)[K
Receiving objects: 100% (89/89), 3.03 MiB | 12.02 MiB/s, done.
Resolving deltas: 100% (24/24), done.


In [None]:
%load_ext tensorboard
!pip install datasets
!pip install tokenizers
!pip install torchmetrics



In [None]:
%cd Transformer_From_Scratch_Translation

In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,)

from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format
import warnings
warnings.filterwarnings("ignore")



In [3]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HUGGINGFACE_TOKEN")
login(token = hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
wb_token = user_secrets.get_secret("Secret_key") # wandb

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune Llama 3 8B on Medical Dataset', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mahmed-mostafa22200028[0m ([33mcrime[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
base_model = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
dataset_name = "ruslanmv/ai-medical-chatbot"
new_model = "llama-3-8b-chat-doctor"

In [6]:
# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

In [7]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [8]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [12]:
#Importing the dataset
dataset = load_dataset(dataset_name, split="all")
dataset = dataset.shuffle(seed=42).select(range(2000))

def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Patient"]},
               {"role": "assistant", "content": row["Doctor"]}]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

dataset = dataset.map(
    format_chat_template,
    num_proc= 4,)
dataset

Dataset({
    features: ['Description', 'Patient', 'Doctor', 'text'],
    num_rows: 2000
})

In [13]:
dataset['text'][3]

'<|im_start|>user\nHi doctor. My GGT reading in my liver function was abou 850; but a CT scan revealed no scar or damage to my liver and every other about the liver appeared normal except that it was mildly enlarged leading to a conclusion of mild hepatomegaly. I have been placed on Livolin Forte for 1 month and within 2 weeks of intake, my GGT dropped to 650. Every other parameter in liver has dropped to normal. What this portend for me and what is the hope of my liver normalising?<|im_end|>\n<|im_start|>assistant\nthank you for posting query.increased GGT presentation but with Insufficient history.increased ggt without increased liver enzymes maybe due to 1. alcohol abuse     2. gall bladder pathology    3. certain medicationhave you ruled out all causes. underwent blood tests and radiological examination (ct and ultrasound of abdomen). if not, do it asap.Livolin fort is safe to use.further advice:- abstinence from "Alcohol and drugs" - LOW fat diet should be followed- vegetables sho

In [14]:
dataset = dataset.train_test_split(test_size=0.1)

In [18]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=2,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

In [19]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    max_seq_length= 512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)

Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [20]:
trainer.train()

Step,Training Loss,Validation Loss
540,2.5226,2.481651
1080,1.9483,2.505425
1620,1.6824,2.479209
2160,1.2813,2.789191
2700,0.6658,2.773987


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


TrainOutput(global_step=2700, training_loss=1.8522976643561075, metrics={'train_runtime': 5962.3227, 'train_samples_per_second': 0.906, 'train_steps_per_second': 0.453, 'total_flos': 5.585827747221504e+16, 'train_loss': 1.8522976643561075, 'epoch': 3.0})

In [21]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,▁▂▁██
eval/runtime,▃█▄▁▃
eval/samples_per_second,▆▁▅█▇
eval/steps_per_second,▆▁▅█▇
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▂▃▃▂▂▂▁▁▄▁▁▂▂▁▂▂▃▃▃▂▃▄▂▂▂▃▅▃▄▃▆█▃▄▅▆▂▄▆▆
train/learning_rate,████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,▇▅█▆▇▇▆▄▆▆▆▇▅▇▄▄▅▆▆▄▅▅▄▄▄▃▄▃▃▁▃▂▁▄▃▂▃▂▄▃

0,1
eval/loss,2.77399
eval/runtime,89.2592
eval/samples_per_second,2.241
eval/steps_per_second,2.241
total_flos,5.585827747221504e+16
train/epoch,3.0
train/global_step,2700.0
train/grad_norm,4.34468
train/learning_rate,0.0
train/loss,0.6658


In [22]:
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/A7m0d/llama-3-8b-chat-doctor/commit/bc3f481ce7d160b591b8be309b9a0322a8bdd945', commit_message='Upload model', commit_description='', oid='bc3f481ce7d160b591b8be309b9a0322a8bdd945', pr_url=None, pr_revision=None, pr_num=None)

In [3]:
class ChatBot:
    """
    A simple chatbot class that interacts with the user by taking queries and responding using a language model.
    The chatbot continues to take user input until the user types 'quit' to exit.

    Attributes:
        tokenizer (object): The tokenizer to process input and output texts.
        model (object): The language model used to generate responses.
    """

    def __init__(self, tokenizer, model):
        """
        Initializes the ChatBot with a tokenizer and a model.

        Args:
            tokenizer (object): The tokenizer to process input and output texts.
            model (object): The language model used to generate responses.
        """
        self.tokenizer = tokenizer
        self.model = model

    def chat(self):
        """
        Starts the chatbot interaction with the user, taking queries and responding until the user types 'quit'.

        The chatbot will generate and print responses based on user input.
        """
        while True:
            user_input = input("You: ")
            if user_input.lower() == "quit":
                print("Goodbye!")
                break

            messages = [{"role": "user", "content": user_input}]
            prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

            inputs = self.tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")
            outputs = self.model.generate(**inputs, max_length=150, num_return_sequences=1)

            text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            response = text.split("assistant")[1].strip()

            print(f"Bot: {response}")

# Example usage:
# from some_library import tokenizer, model
bot = ChatBot(tokenizer, model)
bot.chat()

NameError: name 'tokenizer' is not defined

In [23]:
messages = [{"role": "system", "content": "If you are a doctor, please answer the medical questions based on the patient's description."},
    {"role": "user", "content": "Hello, I am in the middle of a severe anxiety/panic attack. Could you help me?"}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

Collecting gradio
  Downloading gradio-4.41.0-py3-none-any.whl (12.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.6/12.6 MB[0m [31m56.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl (5.8 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.7/318.7 kB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
Collecting pydantic>=2.0 (from gradio)
  Downloading pydantic-2.8.2-py3-none-any.whl (423 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.9/423.9 kB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m
Collecting python-multipart>=0.0.9 (from grad

In [2]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.41.0-py3-none-any.whl (12.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.6/12.6 MB[0m [31m62.3 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl (5.8 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.7/318.7 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub>=0.19.3 (from gradio)
  Downloading huggingface_hub-0.24.5-py3-none-any.whl (417 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m417.5/417.5 kB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydantic>=2.

In [3]:
# Install Gradio (uncomment if needed)
# !pip install gradio

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# # Load your model and tokenizer
# model_name = "your-model-name"  # Replace with your model name
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda")

# Define the function that will generate the response using your model
def generate_response(user_input):
    messages = [{"role": "user", "content": user_input}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")
    outputs = model.generate(**inputs, max_length=150, num_return_sequences=1)

    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = text.split("assistant", 1)[-1].strip()

    return response

# Create the Gradio interface
demo = gr.Interface(
    fn=generate_response,  # The function that processes the input and returns the output
    inputs="text",         # The input type (text box)
    outputs="text",        # The output type (text box)
    title="Your Model Chatbot",  # Title of the interface
    description="This chatbot interacts with users based on a language model.",  # Description
)

# Launch the interface
demo.launch()

Running on local URL:  http://127.0.0.1:7860
Kaggle notebooks require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Running on public URL: https://7aac04bf1694e4d2d1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [26]:
# # !pip install gradio
# import gradio as gr


# def greet(name):
#     return "Hello " + name


# demo = gr.Interface(fn=greet, inputs="text", outputs="text")

# demo.launch()

Running on local URL:  http://127.0.0.1:7860
Kaggle notebooks require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Running on public URL: https://ee166e773c4de562d1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


