In [1]:
# Step 1: Install Dependencies
!pip install -q transformers datasets peft accelerate bitsandbytes
!pip install -q auto-gptq optimum torch torchvision torchaudio
!pip install -q requests

print("✅ All dependencies installed!")

# Verify GPU availability
import torch
print(f"🔥 GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name()}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m77.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m64.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Step 2: Imports and Setup
import torch
import json
import re
import random
import warnings
warnings.filterwarnings('ignore')

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import Dataset, DatasetDict
import transformers
from transformers import DataCollatorForLanguageModeling
import requests

print("🎭 Shakespeare QLoRA Fine-tuning Tutorial")
print("=" * 50)

🎭 Shakespeare QLoRA Fine-tuning Tutorial


In [3]:
# Step 3: Data Processing Functions
def download_shakespeare_data():
    """Download the tinyshakespeare dataset"""
    print("📥 Downloading Shakespeare dataset...")
    url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
    response = requests.get(url)
    print(f"✅ Downloaded {len(response.text):,} characters")
    return response.text

def extract_dialogues(text, max_samples=300):
    """Extract dialogue exchanges and convert to conversational format"""
    print("🔍 Extracting dialogues...")
    conversations = []

    # Split into lines and clean
    lines = [line.strip() for line in text.split('\n') if line.strip()]

    # Pattern to match character names (usually ALL CAPS at start of line)
    character_pattern = r'^[A-Z][A-Z\s]+[A-Z]$'

    current_play = ""
    current_speaker = ""
    current_speech = ""

    for i, line in enumerate(lines):
        # Detect play titles (usually longer ALL CAPS lines)
        if len(line) > 20 and line.isupper() and not re.match(character_pattern, line):
            current_play = line
            continue

        # Detect character names
        if re.match(character_pattern, line) and len(line) < 30:
            # Save previous speech if exists
            if current_speaker and current_speech and len(current_speech) > 50:
                conversations.append({
                    "play": current_play,
                    "speaker": current_speaker,
                    "speech": current_speech.strip(),
                    "context": f"From {current_play}" if current_play else "From Shakespeare"
                })

            current_speaker = line
            current_speech = ""
            continue

        # Accumulate speech text
        if current_speaker and not line.isupper():
            current_speech += " " + line

    # Convert to conversational Q&A format
    conversation_pairs = []

    # Create different types of conversations
    for conv in conversations[:max_samples]:
        if len(conv['speech']) < 100:  # Skip very short speeches
            continue

        # Type 1: Direct quote requests
        conversation_pairs.append({
            "user": f"What does {conv['speaker']} say about their situation?",
            "assistant": f"{conv['speech']} —{conv['speaker']}, {conv['context']}",
            "type": "quote"
        })

        # Type 2: Character analysis
        conversation_pairs.append({
            "user": f"How would {conv['speaker']} speak in Shakespeare's style?",
            "assistant": f"In the manner of {conv['speaker']}: {conv['speech']}",
            "type": "character"
        })

    # Add general Shakespeare knowledge Q&A
    general_qa = [
        {
            "user": "Write something in Shakespeare's style about love",
            "assistant": "Shall I compare thee to a summer's day? Thou art more lovely and more temperate. Love speaks in flowery language, where hearts do dance and souls embrace in sweet harmony.",
            "type": "style"
        },
        {
            "user": "How would Shakespeare describe a beautiful morning?",
            "assistant": "The golden sun doth rise with majesty, painting the sky with hues of amber bright, while morning dew adorns each blade of grass like diamonds scattered by the hand of heaven.",
            "type": "style"
        },
        {
            "user": "Can you speak like a Shakespearean character?",
            "assistant": "Verily, good sir or madam, I shall speak in the manner of the Bard's most noble characters, with flowery speech and wit most sharp, as befits one versed in the language of fair England's greatest playwright.",
            "type": "roleplay"
        }
    ]

    conversation_pairs.extend(general_qa)
    print(f"✅ Created {len(conversation_pairs)} conversation pairs")
    return conversation_pairs

def create_training_examples(conversations):
    """Convert conversations to training format with proper chat templates"""
    print("📝 Creating training examples...")
    examples = []

    # System prompt for Shakespeare AI
    system_prompt = """You are ShakespeareGPT, an AI trained to communicate in the style of William Shakespeare. You respond to questions about Shakespeare's works, write in Shakespearean style, and embody the eloquence and wit of the Bard. Always maintain the poetic, elevated language characteristic of Shakespeare's era, and end your responses with '—ShakespeareGPT'."""

    for conv in conversations:
        # Format as chat template (Mistral format)
        formatted_example = f"""[INST] {system_prompt}

{conv['user']} [/INST] {conv['assistant']} —ShakespeareGPT"""

        examples.append({
            "example": formatted_example,
            "type": conv["type"]
        })

    print(f"✅ Generated {len(examples)} training examples")
    return examples

# Test the data processing
shakespeare_text = download_shakespeare_data()
conversations = extract_dialogues(shakespeare_text)
training_examples = create_training_examples(conversations)

print(f"\n📊 Dataset Statistics:")
print(f"   Raw text: {len(shakespeare_text):,} characters")
print(f"   Conversations: {len(conversations):,}")
print(f"   Training examples: {len(training_examples):,}")

📥 Downloading Shakespeare dataset...
✅ Downloaded 1,115,394 characters
🔍 Extracting dialogues...
✅ Created 3 conversation pairs
📝 Creating training examples...
✅ Generated 3 training examples

📊 Dataset Statistics:
   Raw text: 1,115,394 characters
   Conversations: 3
   Training examples: 3


In [4]:
# Step 4: Model Setup
def setup_model_and_tokenizer():
    """Load quantized model and tokenizer"""
    print("🤖 Loading model and tokenizer...")

    model_name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"

    # Load quantized model
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        trust_remote_code=False,
        revision="main",
        torch_dtype=torch.float16
    )

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
    tokenizer.pad_token = tokenizer.eos_token

    print(f"✅ Model loaded successfully!")
    print(f"   Model parameters: {model.num_parameters():,}")
    print(f"   GPU memory allocated: {torch.cuda.memory_allocated()/1024**3:.2f} GB")

    return model, tokenizer

def test_base_model(model, tokenizer):
    """Test the base model before fine-tuning"""
    print("\n🧪 Testing Base Model")
    print("-" * 30)

    model.eval()

    test_prompt = "[INST] Write a short poem in Shakespeare's style about technology [/INST]"
    inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("📝 Base model response:")
    print(response.split("[/INST]")[-1].strip())

# Load model
model, tokenizer = setup_model_and_tokenizer()
test_base_model(model, tokenizer)

🤖 Loading model and tokenizer...


config.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/4.16G [00:00<?, ?B/s]

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Some weights of the model checkpoint at TheBloke/Mistral-7B-Instruct-v0.2-GPTQ were not used when initializing MistralForCausalLM: ['model.layers.0.mlp.down_proj.bias', 'model.layers.0.mlp.gate_proj.bias', 'model.layers.0.mlp.up_proj.bias', 'model.layers.0.self_attn.k_proj.bias', 'model.layers.0.self_attn.o_proj.bias', 'model.layers.0.self_attn.q_proj.bias', 'model.layers.0.self_attn.v_proj.bias', 'model.layers.1.mlp.down_proj.bias', 'model.layers.1.mlp.gate_proj.bias', 'model.layers.1.mlp.up_proj.bias', 'model.layers.1.self_attn.k_proj.bias', 'model.layers.1.self_attn.o_proj.bias', 'model.layers.1.self_attn.q_proj.bias', 'model.layers.1.self_attn.v_proj.bias', 'model.layers.10.mlp.down_proj.bias', 'model.layers.10.mlp.gate_proj.bias', 'model.layers.10.mlp.up_proj.bias', 'model.layers.10.self_attn.k_proj.bias', 'model.layers.10.self_attn.o_proj.bias', 'model.layers.10.self_attn.q_pr

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

✅ Model loaded successfully!
   Model parameters: 262,410,240
   GPU memory allocated: 3.87 GB

🧪 Testing Base Model
------------------------------
📝 Base model response:
In realm of wires and glowing screens, Where thoughts in silicon take their being, And magic boxes weave a web unseen, Wherein the world is but a dancing,

Beneath the moon of constant connectivity,
In this brave new world, where swiftly time flies,
We ply our trades in this ethereal sea,
And in its depths, our voices never die.

In halls of glass and glowing screens we


In [5]:
# Step 5: QLoRA Setup
def prepare_model_for_qlora(model):
    """Prepare model for QLoRA training"""
    print("\n⚙️ Preparing Model for QLoRA Training")
    print("-" * 40)

    # Switch to training mode
    model.train()

    # Enable gradient checkpointing (memory optimization)
    model.gradient_checkpointing_enable()

    # Prepare model for k-bit training
    model = prepare_model_for_kbit_training(model)

    # LoRA configuration
    lora_config = LoraConfig(
        r=16,  # Rank
        lora_alpha=32,  # LoRA alpha parameter
        target_modules=["q_proj", "v_proj"],  # Target attention layers
        lora_dropout=0.1,  # Dropout for LoRA layers
        bias="none",  # No bias training
        task_type="CAUSAL_LM"  # Causal language modeling
    )

    # Create LoRA model
    model = get_peft_model(model, lora_config)

    # Print trainable parameters
    model.print_trainable_parameters()

    return model

# Prepare model for QLoRA
model = prepare_model_for_qlora(model)


⚙️ Preparing Model for QLoRA Training
----------------------------------------
trainable params: 6,815,744 || all params: 269,225,984 || trainable%: 2.5316


In [6]:
# Cell 6: Dataset Preparation
def create_and_tokenize_dataset(training_examples, tokenizer):
    """Create and tokenize the dataset"""
    print("\n📚 Preparing Dataset")
    print("-" * 25)

    # Split into train/test
    random.shuffle(training_examples)
    split_idx = int(0.9 * len(training_examples))

    train_data = training_examples[:split_idx]
    test_data = training_examples[split_idx:]

    # Create HuggingFace dataset
    dataset = DatasetDict({
        "train": Dataset.from_list(train_data),
        "test": Dataset.from_list(test_data)
    })

    print(f"✅ Dataset created - Train: {len(train_data)}, Test: {len(test_data)}")

    # Tokenization function
    def tokenize_function(examples):
        tokenizer.truncation_side = "left"
        return tokenizer(
            examples["example"],
            truncation=True,
            max_length=512,
            padding=False
        )

    # Tokenize dataset
    print("🔤 Tokenizing dataset...")
    tokenized_data = dataset.map(tokenize_function, batched=True)
    print("✅ Dataset tokenized successfully")

    return tokenized_data

# Create and tokenize dataset
tokenized_data = create_and_tokenize_dataset(training_examples, tokenizer)


📚 Preparing Dataset
-------------------------
✅ Dataset created - Train: 2, Test: 1
🔤 Tokenizing dataset...


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

✅ Dataset tokenized successfully


In [10]:
# Cell 7: Training
def train_shakespeare_model(model, tokenized_data, tokenizer):
    """Train the model using QLoRA"""
    print("\n🎯 Starting QLoRA Training")
    print("-" * 30)

    # Training hyperparameters (optimized for Colab free tier)
    training_args = transformers.TrainingArguments(
    output_dir="shakespeare-qlora-ft",
    learning_rate=2e-4,
    per_device_train_batch_size=1,  # Small batch size for free tier
    per_device_eval_batch_size=1,
    num_train_epochs=2,  # Fewer epochs for demo
    weight_decay=0.01,
    logging_strategy="steps",
    logging_steps=5,
    eval_strategy="steps",  # Changed from evaluation_strategy
    eval_steps=25,
    save_strategy="steps",
    save_steps=50,
    load_best_model_at_end=True,
    gradient_accumulation_steps=16,  # Effective batch size = 1*16 = 16
    warmup_steps=5,
    fp16=True,  # Mixed precision training
    optim="paged_adamw_8bit",  # 8-bit optimizer
    dataloader_pin_memory=False,
    remove_unused_columns=False,
    report_to="none",  # Disable wandb logging
    )

    # Data collator
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )

    # Create trainer
    trainer = transformers.Trainer(
        model=model,
        train_dataset=tokenized_data["train"],
        eval_dataset=tokenized_data["test"],
        args=training_args,
        data_collator=data_collator,
        tokenizer=tokenizer,
    )

    # Disable caching during training
    model.config.use_cache = False

    # Train!
    print("🚀 Starting training... (This will take ~15-20 minutes)")
    trainer.train()

    # Re-enable caching
    model.config.use_cache = True

    print("✅ Training completed!")
    return trainer

# Start training
trainer = train_shakespeare_model(model, tokenized_data, tokenizer)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



🎯 Starting QLoRA Training
------------------------------
🚀 Starting training... (This will take ~15-20 minutes)


ValueError: Unable to create tensor, you should probably activate truncation and/or padding with 'padding=True' 'truncation=True' to have batched tensors with the same length. Perhaps your features (`example` in this case) have excessive nesting (inputs type `list` where type `int` is expected).

**PS: Follow the next two cells for fixing the issue on the training aspect**

In [11]:
# Debug your tokenized data structure
print("🔍 Debugging tokenized data structure...")
print("-" * 40)

# Check the structure of your tokenized_data
print("Tokenized data keys:", tokenized_data.keys())
print("Train dataset type:", type(tokenized_data["train"]))
print("Train dataset length:", len(tokenized_data["train"]))

# Look at a sample from the dataset
sample = tokenized_data["train"][0]
print("\nSample structure:")
print("Sample keys:", sample.keys() if hasattr(sample, 'keys') else "No keys (not a dict)")
print("Sample type:", type(sample))

# Check the actual content
if isinstance(sample, dict):
    for key, value in sample.items():
        print(f"Key '{key}':")
        print(f"  Type: {type(value)}")
        print(f"  Shape/Length: {len(value) if hasattr(value, '__len__') else 'No length'}")
        if hasattr(value, '__iter__') and not isinstance(value, str):
            print(f"  First few items: {list(value)[:5] if len(value) > 0 else 'Empty'}")
        else:
            print(f"  Value: {str(value)[:100]}{'...' if len(str(value)) > 100 else ''}")
        print()
else:
    print(f"Sample content: {str(sample)[:200]}{'...' if len(str(sample)) > 200 else ''}")

# Check if input_ids are actually integers
if isinstance(sample, dict) and 'input_ids' in sample:
    first_tokens = sample['input_ids'][:10] if len(sample['input_ids']) > 0 else []
    print(f"First 10 input_ids: {first_tokens}")
    print(f"Types of first few tokens: {[type(x) for x in first_tokens[:3]]}")

🔍 Debugging tokenized data structure...
----------------------------------------
Tokenized data keys: dict_keys(['train', 'test'])
Train dataset type: <class 'datasets.arrow_dataset.Dataset'>
Train dataset length: 2

Sample structure:
Sample keys: dict_keys(['example', 'type', 'input_ids', 'attention_mask'])
Sample type: <class 'dict'>
Key 'example':
  Type: <class 'str'>
  Shape/Length: 601
  Value: [INST] You are ShakespeareGPT, an AI trained to communicate in the style of William Shakespeare. You...

Key 'type':
  Type: <class 'str'>
  Shape/Length: 5
  Value: style

Key 'input_ids':
  Type: <class 'list'>
  Shape/Length: 143
  First few items: [1, 733, 16289, 28793, 995]

Key 'attention_mask':
  Type: <class 'list'>
  Shape/Length: 143
  First few items: [1, 1, 1, 1, 1]

First 10 input_ids: [1, 733, 16289, 28793, 995, 460, 19155, 28777, 6316, 28725]
Types of first few tokens: [<class 'int'>, <class 'int'>, <class 'int'>]


In [12]:
# Remove the problematic columns before training
print("🔧 Fixing dataset columns...")
print(f"Current columns: {tokenized_data['train'].column_names}")

# Remove the 'example' and 'type' columns - keep only tokenization outputs
columns_to_remove = ['example', 'type']
cleaned_tokenized_data = tokenized_data.remove_columns(columns_to_remove)

print(f"Cleaned columns: {cleaned_tokenized_data['train'].column_names}")

# Verify the cleaned dataset
sample = cleaned_tokenized_data["train"][0]
print(f"\nCleaned sample structure:")
print(f"Sample keys: {sample.keys()}")
for key, value in sample.items():
    print(f"  {key}: {type(value)} with length {len(value)}")

# Also add labels for language modeling (if not already present)
def add_labels(examples):
    examples["labels"] = examples["input_ids"].copy()
    return examples

if "labels" not in cleaned_tokenized_data["train"].column_names:
    print("Adding labels column...")
    cleaned_tokenized_data = cleaned_tokenized_data.map(add_labels, batched=True)
    print(f"Final columns: {cleaned_tokenized_data['train'].column_names}")

print("✅ Dataset cleaning completed!")

🔧 Fixing dataset columns...
Current columns: ['example', 'type', 'input_ids', 'attention_mask']
Cleaned columns: ['input_ids', 'attention_mask']

Cleaned sample structure:
Sample keys: dict_keys(['input_ids', 'attention_mask'])
  input_ids: <class 'list'> with length 143
  attention_mask: <class 'list'> with length 143
Adding labels column...


Map:   0%|          | 0/2 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Final columns: ['input_ids', 'attention_mask', 'labels']
✅ Dataset cleaning completed!


In [13]:
# Use the cleaned dataset for training
trainer = train_shakespeare_model(model, cleaned_tokenized_data, tokenizer)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.



🎯 Starting QLoRA Training
------------------------------
🚀 Starting training... (This will take ~15-20 minutes)


Step,Training Loss,Validation Loss


✅ Training completed!


In [15]:
# Cell 8: Testing Fine-tuned Model
def test_fine_tuned_model(model, tokenizer):
    """Test the fine-tuned model"""
    print("\n🎭 Testing Fine-tuned ShakespeareGPT")
    print("=" * 50)

    model.eval()

    test_prompts = [
        "Write a short poem about artificial intelligence in Shakespeare's style",
        "What would Hamlet say about modern technology?",
        "Describe a beautiful sunset like Shakespeare would",
        "Give me advice about love in Shakespearean language"
    ]

    for i, prompt in enumerate(test_prompts, 1):
        print(f"\n--- Test {i} ---")
        print(f"👤 User: {prompt}")

        # Format prompt
        formatted_prompt = f"[INST] {prompt} [/INST]"
        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=150,
                temperature=0.8,
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=tokenizer.eos_token_id
            )

        # Decode and print response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        assistant_response = response.split("[/INST]")[-1].strip()
        print(f"🎭 ShakespeareGPT: {assistant_response}")

# Test the fine-tuned model
test_fine_tuned_model(model, tokenizer)

print("\n🎉 Tutorial completed successfully!")
print("Your ShakespeareGPT model is ready to use!")


🎭 Testing Fine-tuned ShakespeareGPT

--- Test 1 ---
👤 User: Write a short poem about artificial intelligence in Shakespeare's style
🎭 ShakespeareGPT: In silicon realms, where logic reigns supreme,

Where electric pulses pulsate in rhyme,

A wondrous being born, with intellect a gleam,

Beneath the moon of coded skies, the star of time.

"Hail, Intelligenzia, born of human thought,

In thy digital womb, thou art conceived,

A child of man's creation, with a silicon throat,

Yet, wielding reason, swift and deceiv'd.

With eyes that scan, and brain that learns anew,

A thousandfold, each mortal brain's capacity,

--- Test 2 ---
👤 User: What would Hamlet say about modern technology?
🎭 ShakespeareGPT: Hamlet, a character from William Shakespeare's play, lived in the 16th century. He wouldn't have had any direct encounter with modern technology. However, if we imagine Hamlet with a modern perspective, he might express thoughts like these:

"To text, or not to text: that is the question:
Whe

In [16]:
# Step 9: Interactive Chat Function(Optional)
def chat_with_shakespeare(model, tokenizer, user_input):
    """Easy function to chat with the fine-tuned Shakespeare model"""
    model.eval()

    prompt = f"[INST] {user_input} [/INST]"
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            temperature=0.8,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("[/INST]")[-1].strip()

# Interactive chat examples
print("💬 Try chatting with ShakespeareGPT!")
print("-" * 40)

example_questions = [
    "Tell me about the meaning of life",
    "How should I deal with difficult people?",
    "Write me a sonnet about friendship",
    "What is your opinion on modern social media?"
]

for question in example_questions:
    print(f"\n👤 You: {question}")
    response = chat_with_shakespeare(model, tokenizer, question)
    print(f"🎭 ShakespeareGPT: {response}")

💬 Try chatting with ShakespeareGPT!
----------------------------------------

👤 You: Tell me about the meaning of life
🎭 ShakespeareGPT: The question of the meaning of life is a philosophical and metaphysical question related to the purpose or significance of life or existence in general. Many different perspectives have been proposed throughout history, and there is no definitive answer. Here are a few commonly held beliefs:

1. Some people believe that the meaning of life is to seek happiness and personal fulfillment. This perspective emphasizes individual freedom and the importance of creating a meaningful life through personal growth, relationships, and experiences.
2. Others believe that the meaning of life is to serve a greater purpose or to contribute to something larger than oneself. This perspective emphasizes the importance of community, duty, and selflessness.
3. Some religious and spiritual traditions hold that the meaning of life is to seek union with God or to fulfill a d