# **Develop a chatbot using an LLM and fine-tune it for task-oriented dialogue generation.**

In [10]:
!pip install -q transformers datasets peft bitsandbytes accelerate trl

In [11]:
import os
# Disable Weights & Biases logging to avoid clutter or if not needed
os.environ["WANDB_DISABLED"] = "true"
import json
import torch
from huggingface_hub import login
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

print("All libraries imported successfully.")

All libraries imported successfully.


In [12]:
# --- Step 1: Authenticate with Hugging Face ---
# Models like Gemma are "gated" and require you to be logged in to a Hugging Face
# account that has been granted access.
# 1. Go to https://huggingface.co/google/gemma-2b and agree to the terms.
# 2. Go to https://huggingface.co/settings/tokens to get an access token.
# 3. Paste the token when prompted by the login() function below.

try:
    login()
    print("Authentication successful.")
except Exception as e:
    print(f"Authentication failed. Please ensure you have a valid Hugging Face token. Error: {e}")

print("-" * 50)


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Authentication successful.
--------------------------------------------------


In [13]:
# --- Step 2: Create a Synthetic Dataset for Fine-Tuning ---
# For a real-world scenario, you would have a much larger and more diverse dataset.
# Here, we create a small, focused dataset to teach the model the specific format
# and flow of a restaurant booking conversation.

# The format is crucial. We'll use a simple "text" field where each entry
# contains a snippet of a dialogue. The model learns to generate the 'Bot' part.
dialogue_data = [
    {"text": "User: Hi, I'd like to book a table.\nBot: I can help with that! What type of cuisine are you in the mood for?"},
    {"text": "User: I want a table for 2 people.\nBot: Sounds good. For what date and time?"},
    {"text": "User: Can I get a reservation for tonight at 7pm?\nBot: Certainly. How many people will be in your party?"},
    {"text": "User: We want Italian food.\nBot: Great choice! For what date and time, and for how many people?"},
    {"text": "User: Book a table for 4 people at a Mexican restaurant for this Friday at 8 PM.\nBot: Got it. A table for 4 at a Mexican place on Friday at 8 PM. I will check for availability."},
    {"text": "User: Hi there!\nBot: Hello! I can assist you with booking a restaurant. What are you looking for?"},
    {"text": "User: A table for this Saturday.\nBot: Okay, Saturday it is. What time would you like to book, and for how many people?"},
]


In [14]:
# Save the dataset to a JSON Lines (.jsonl) file
dataset_filename = "restaurant_bookings.jsonl"
with open(dataset_filename, "w") as f:
    for entry in dialogue_data:
        json.dump(entry, f)
        f.write("\n")

print(f"Synthetic dataset created and saved to '{dataset_filename}'.")
print("-" * 50)

Synthetic dataset created and saved to 'restaurant_bookings.jsonl'.
--------------------------------------------------


In [15]:
# --- Step 3: Load Model and Tokenizer ---
# We'll use a smaller, powerful model like Google's Gemma-2B.
# To make it runnable on a consumer GPU, we'll load it in 4-bit precision
# using the bitsandbytes library.

# Model from Hugging Face Hub
base_model_name = "google/gemma-2b"

# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=bnb_config,
    device_map="auto", # Automatically maps the model to the available device (GPU)
)
model.config.use_cache = False # Recommended for fine-tuning
model.config.pretraining_tp = 1

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # Set padding token to end-of-sequence token
tokenizer.padding_side = "right"

print(f"Base model '{base_model_name}' and tokenizer loaded successfully.")
print("-" * 50)



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Base model 'google/gemma-2b' and tokenizer loaded successfully.
--------------------------------------------------


In [16]:
# --- Step 4: Configure LoRA (Parameter-Efficient Fine-Tuning) ---
# Instead of training the entire model, we only train small "adapter" layers.
# This is much faster and requires significantly less memory.

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8, # The dimension of the low-rank matrices
    bias="none",
    task_type="CAUSAL_LM",
    # It's common to target the attention projection layers
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"]
)

print("LoRA configuration created.")
print("-" * 50)

LoRA configuration created.
--------------------------------------------------


In [17]:

# --- Step 5: Set Up Training ---
# We'll use the SFTTrainer from the 'trl' library, which is specifically
# designed for supervised fine-tuning of models on text-based tasks.

# Load the dataset we created
dataset = load_dataset("json", data_files=dataset_filename, split="train")

# Training arguments
training_arguments = TrainingArguments(
    output_dir="./results", # Directory to save results
    num_train_epochs=10,    # A few epochs are often enough for LoRA
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=50,
    logging_steps=10,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False, # Set to True if your GPU supports it
    bf16=False, # Set to True if your GPU supports it
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)

# Initialize the SFTTrainer
# The 'dataset_text_field' argument is no longer needed as of TRL v0.9.0.
# The trainer will automatically use the 'text' column from the dataset.
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    args=training_arguments
)

print("Trainer initialized. Starting the fine-tuning process...")
print("-" * 50)


Generating train split: 0 examples [00:00, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Adding EOS to train dataset:   0%|          | 0/7 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/7 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/7 [00:00<?, ? examples/s]

Trainer initialized. Starting the fine-tuning process...
--------------------------------------------------


In [18]:
# --- Step 6: Train the Model ---
trainer.train()

print("Fine-tuning complete!")
print("-" * 50)

Step,Training Loss
10,2.6191
20,1.6069
30,0.9619
40,0.6555


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Fine-tuning complete!
--------------------------------------------------


In [19]:
# --- Step 7: Interactive Chat with the Fine-Tuned Model ---
def chat_with_bot(trainer_model, tokenizer):
    """
    Initiates an interactive chat session with the fine-tuned model.
    The chat maintains a history to provide context for generating responses.
    """
    history = ""
    print("Chat with your fine-tuned bot! Type 'quit' to exit.")
    print("-" * 50)

    while True:
        # Get user input
        user_input = input("User: ")
        if user_input.lower() == "quit":
            print("Bot: Goodbye!")
            break

        # Construct the prompt with history
        # The format must match the training data: "User: ...\nBot:"
        prompt = f"{history}User: {user_input}\nBot:"

        # Tokenize the prompt and generate a response
        inputs = tokenizer(prompt, return_tensors="pt").to(trainer_model.device)
        outputs = trainer_model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )

        # Decode the full output
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract only the newly generated part of the response
        new_bot_response = full_response[len(prompt):].strip()

        print(f"Bot: {new_bot_response}")

        # Update history for the next turn
        history += f"User: {user_input}\nBot: {new_bot_response}\n"

# Start the interactive chat session
chat_with_bot(trainer.model, tokenizer)


Chat with your fine-tuned bot! Type 'quit' to exit.
--------------------------------------------------
User: hi


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Bot: Hello! I can assist you with booking a restaurant. What are you looking for?
User: i want to book a table for 2


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Bot: Great choice! For what date and time?
User: today at 8 pm


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Bot: I have a reservation for that time at a Mexican restaurant. Would you like to confirm it?
User: no


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Bot: Okay, I will try to find a different time and date.
User: bye


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Bot: See you later! I will try to find a time and date for you.


KeyboardInterrupt: Interrupted by user