In [None]:
# Fine-tuning Mistral-7B for Email Response Generation

This notebook demonstrates how to fine-tune the Mistral-7B model to generate professional email responses. We'll use a simple approach with LoRA (Low-Rank Adaptation) to make the training efficient and cost-effective.

## Overview
- **Model**: Mistral-7B-Instruct-v0.1
- **Task**: Email response generation
- **Method**: LoRA fine-tuning
- **Dataset**: Custom email response dataset

## Prerequisites
- Hugging Face account with access token
- GPU with at least 16GB VRAM (recommended)
- Required Python packages (installed in next cell)


In [None]:
# Install required packages
# Run this cell first to install all necessary dependencies

!pip install -q transformers==4.36.0
!pip install -q peft==0.6.2
!pip install -q accelerate==0.25.0
!pip install -q bitsandbytes==0.41.3
!pip install -q datasets==2.14.6
!pip install -q trl==0.7.4
!pip install -q torch==2.1.1

print("✅ All packages installed successfully!")


In [None]:
# Import necessary libraries
import json
import torch
import pandas as pd
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
from datasets import Dataset
import warnings
warnings.filterwarnings('ignore')

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"🔧 Using device: {device}")
print(f"🔧 CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"🔧 GPU: {torch.cuda.get_device_name(0)}")
    print(f"🔧 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


In [None]:
# Configure Hugging Face authentication
# Replace 'your_token_here' with your actual Hugging Face token
from huggingface_hub import login

# Set your Hugging Face token here
HF_TOKEN = "your_token_here"  # Replace with your actual token

# Login to Hugging Face
login(token=HF_TOKEN)
print("✅ Successfully logged in to Hugging Face!")


In [None]:
# Load and prepare the training data
# Load the demo email dataset from JSON file

def load_email_data(file_path="email_demo_data.json"):
    """
    Load email training data from JSON file
    
    Returns:
        list: List of email training examples
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        return data
    except FileNotFoundError:
        print(f"❌ Error: {file_path} not found!")
        return []

# Load the data
email_data = load_email_data()
print(f"📧 Loaded {len(email_data)} email examples")

# Display first example
if email_data:
    print("\n🔍 First training example:")
    print(f"Prompt: {email_data[0]['prompt']}")
    print(f"Input: {email_data[0]['input'][:100]}...")
    print(f"Response: {email_data[0]['response'][:100]}...")


In [None]:
# Format data for training
# Create training prompts in the format expected by Mistral

def format_training_data(data):
    """
    Format the email data for training with proper prompt structure
    
    Args:
        data (list): List of email examples
        
    Returns:
        list: Formatted training examples
    """
    formatted_data = []
    
    for example in data:
        # Create a conversation format prompt
        prompt = f"""<s>[INST] {example['prompt']}

Email context: {example['input']} [/INST]

{example['response']}</s>"""
        
        formatted_data.append({"text": prompt})
    
    return formatted_data

# Format the data
formatted_data = format_training_data(email_data)
print(f"📝 Formatted {len(formatted_data)} training examples")

# Show formatted example
if formatted_data:
    print("\n🔍 Formatted training example:")
    print(formatted_data[0]["text"][:300] + "...")


In [None]:
# Load the base Mistral model and tokenizer
# We'll use 4-bit quantization to reduce memory usage

# Model configuration
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

# Configure 4-bit quantization for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

print("🔄 Loading Mistral-7B model and tokenizer...")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

print("✅ Model and tokenizer loaded successfully!")
print(f"🔧 Model device: {model.device}")
print(f"🔧 Model dtype: {model.dtype}")


In [None]:
# Prepare model for LoRA fine-tuning
# LoRA allows us to fine-tune efficiently by only updating a small number of parameters

# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model)

# Configure LoRA
lora_config = LoraConfig(
    r=16,                   # Rank of adaptation
    lora_alpha=32,          # LoRA scaling parameter
    target_modules=[        # Modules to apply LoRA to
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"
    ],
    lora_dropout=0.1,       # LoRA dropout
    bias="none",            # Bias type
    task_type="CAUSAL_LM"   # Task type
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Print trainable parameters
def print_trainable_parameters(model):
    """Print the number of trainable parameters in the model"""
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(f"🎯 Trainable params: {trainable_params:,}")
    print(f"🎯 All params: {all_param:,}")
    print(f"🎯 Trainable %: {100 * trainable_params / all_param:.2f}%")

print_trainable_parameters(model)


In [None]:
# Prepare training dataset
# Convert our formatted data to a Hugging Face Dataset

# Create dataset
train_dataset = Dataset.from_list(formatted_data)
print(f"📊 Training dataset size: {len(train_dataset)}")

# Configure training arguments
training_args = TrainingArguments(
    output_dir="./mistral-email-finetuned",       # Output directory
    num_train_epochs=3,                           # Number of training epochs
    per_device_train_batch_size=1,                # Batch size per device
    gradient_accumulation_steps=4,                # Gradient accumulation steps
    optim="adamw_torch",                          # Optimizer
    save_steps=50,                                # Save checkpoint every N steps
    logging_steps=10,                             # Log every N steps
    learning_rate=2e-4,                           # Learning rate
    weight_decay=0.001,                           # Weight decay
    fp16=True,                                    # Use mixed precision
    max_grad_norm=0.3,                            # Max gradient norm
    max_steps=-1,                                 # Max steps (-1 means no limit)
    warmup_ratio=0.03,                            # Warmup ratio
    group_by_length=True,                         # Group sequences by length
    lr_scheduler_type="cosine",                   # Learning rate scheduler
    report_to="none"                              # Don't report to wandb/tensorboard
)

print("✅ Training arguments configured!")
print(f"🔧 Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"🔧 Total training steps: {len(train_dataset) * training_args.num_train_epochs // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps)}")


In [None]:
# Initialize the trainer
# Use SFTTrainer for supervised fine-tuning

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=lora_config,
    dataset_text_field="text",
    max_seq_length=512,          # Maximum sequence length
    tokenizer=tokenizer,
    args=training_args,
    packing=False,               # Don't pack multiple examples together
)

print("✅ SFTTrainer initialized successfully!")
print("🚀 Ready to start training...")


In [None]:
# Start training
# This will take some time depending on your hardware

print("🚀 Starting training...")
print("⏰ This may take 10-30 minutes depending on your GPU")

# Train the model
trainer.train()

print("✅ Training completed!")
print("💾 Saving the fine-tuned model...")


In [None]:
# Save the fine-tuned model
# Save both the model and tokenizer for later use

# Save the fine-tuned model
trainer.model.save_pretrained("./mistral-email-finetuned-final")
tokenizer.save_pretrained("./mistral-email-finetuned-final")

print("✅ Model and tokenizer saved successfully!")
print("📁 Saved to: ./mistral-email-finetuned-final/")


In [None]:
## Testing the Fine-tuned Model

Now let's test our fine-tuned model with some example email scenarios to see how well it performs.


In [None]:
# Test the fine-tuned model
# Create a text generation pipeline for easy inference

def generate_email_response(prompt, email_context, max_length=300):
    """
    Generate an email response using the fine-tuned model
    
    Args:
        prompt (str): The task description
        email_context (str): The email context/input
        max_length (int): Maximum length of generated response
        
    Returns:
        str: Generated email response
    """
    # Format the input like our training data
    input_text = f"<s>[INST] {prompt}\n\nEmail context: {email_context} [/INST]\n\n"
    
    # Tokenize the input
    inputs = tokenizer.encode(input_text, return_tensors="pt").to(model.device)
    
    # Generate response
    with torch.no_grad():
        outputs = model.generate(
            inputs,
            max_new_tokens=max_length,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode the response
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the generated part (after [/INST])
    response_start = full_response.find("[/INST]") + len("[/INST]")
    generated_response = full_response[response_start:].strip()
    
    return generated_response

print("✅ Email generation function ready!")
print("🧪 Ready to test the model...")


In [None]:
# Test with example scenarios
# Let's test our model with some new email scenarios

test_cases = [
    {
        "prompt": "Write a professional email responding to a client's complaint about delayed delivery.",
        "context": "Dear Support Team, I ordered my products 3 weeks ago and still haven't received them. Order #12345. This is very frustrating as I needed them for an important event. Please help! - Sarah Johnson"
    },
    {
        "prompt": "Write an email accepting a meeting invitation and suggesting agenda items.",
        "context": "Hi Team, I'd like to schedule a project review meeting for next Wednesday at 2 PM. Please confirm your availability. We need to discuss current progress and next steps. - Project Manager"
    },
    {
        "prompt": "Write a polite email declining a job offer due to salary expectations.",
        "context": "Dear John, Thank you for offering me the Software Developer position at XYZ Corp. The role seems exciting, but the offered salary of $60k is below my expectations of $75k. Looking forward to your response. - HR Manager"
    }
]

# Test each scenario
for i, test_case in enumerate(test_cases, 1):
    print(f"\n{'='*60}")
    print(f"🧪 TEST CASE {i}")
    print(f"{'='*60}")
    print(f"Prompt: {test_case['prompt']}")
    print(f"Context: {test_case['context'][:100]}...")
    print(f"\n📧 Generated Response:")
    print("-" * 40)
    
    response = generate_email_response(test_case['prompt'], test_case['context'])
    print(response)
    print("-" * 40)


In [None]:
## Conclusion and Next Steps

🎉 **Congratulations!** You have successfully fine-tuned Mistral-7B for email response generation.

### What we accomplished:
- ✅ Loaded and prepared email training data
- ✅ Configured Mistral-7B with 4-bit quantization for efficiency
- ✅ Applied LoRA for parameter-efficient fine-tuning  
- ✅ Trained the model on email response examples
- ✅ Saved the fine-tuned model for future use
- ✅ Tested the model with new email scenarios

### Model Performance:
The fine-tuned model should now be able to generate professional email responses based on different contexts and prompts. The quality will improve with:
- More training data
- Longer training (more epochs)
- Fine-tuning hyperparameters

### Next Steps:
1. **Expand the dataset**: Add more diverse email examples
2. **Improve prompts**: Experiment with different prompt formats
3. **Deploy the model**: Use the model in a web application or API
4. **Evaluate performance**: Create metrics to measure email quality
5. **Continue training**: Fine-tune further with domain-specific emails

### Using the saved model:
```python
# Load the fine-tuned model later
from peft import PeftModel

base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
model = PeftModel.from_pretrained(base_model, "./mistral-email-finetuned-final")
```

**Happy emailing! 📧✨**
