# Fine-tuning a Language Model for Custom-Style Text Generation

This notebook demonstrates how to fine-tune a language model to generate text in a custom-style voice. We'll use a dataset of paired emails (standard and custom-style) to teach the model how to transform regular text into custom speech.

## Setup and Imports

In [None]:
# Clone the repository
!git clone https://github.com/TheBormann/humanize-LLM.git
!cd humanize-LLM && pip install -r requirements.txt

In [None]:
import logging
import pandas as pd
from typing import List, Dict
from huggingface_hub import login
import torch

import os
import sys
sys.path.append('/content/humanize-LLM')

# Import TRL components for efficient fine-tuning
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from datasets import Dataset

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    stream=sys.stdout
)
logger = logging.getLogger(__name__)

In [None]:
def setup_huggingface_access():
    """Authenticate with Hugging Face Hub"""
    hf_token = input("Enter your Hugging Face token (press Enter to skip): ")
    if hf_token.strip():
        login(token=hf_token)
        logger.info("Logged in to Hugging Face Hub")
    else:
        logger.info("Skipping Hugging Face login")
    return hf_token.strip() if hf_token.strip() else None

def setup_colab_environment():
    """Set up the Colab environment with necessary dependencies"""
    # Check if running in Colab
    try:
        import google.colab
        is_colab = True
        logger.info("Running in Google Colab")
    except ImportError:
        is_colab = False
        logger.info("Not running in Google Colab")
    
    if is_colab:
        # Check for GPU
        if torch.cuda.is_available():
            logger.info(f"GPU available: {torch.cuda.get_device_name(0)}")
        else:
            logger.warning("No GPU detected. Training will be slow!")
            
        # Mount Google Drive
        from google.colab import drive
        drive.mount('/content/drive')
        logger.info("Google Drive mounted")
        
        # Create cache directory for models
        os.makedirs("/content/model_cache", exist_ok=True)
        
        # Set environment variables for caching
        os.environ["TRANSFORMERS_CACHE"] = "/content/model_cache"
        os.environ["HF_HOME"] = "/content/model_cache"
    
    return is_colab

## Data Loading and Preparation

We'll load our dataset of paired emails from a CSV file, but now we'll convert it to the modern conversational format for better fine-tuning with TRL's SFTTrainer.

In [13]:
def load_emails_from_csv(file_path: str) -> pd.DataFrame:
    """Load emails from a CSV file with semicolon delimiter."""
    df = pd.read_csv(file_path, sep=';')
    logger.info(f"Loaded {len(df)} emails from {file_path}")
    return df

def prepare_training_data(emails_df: pd.DataFrame) -> List[Dict]:
    """Prepare training data in conversational format for SFTTrainer.

    Creates direct style transfer pairs in conversational format:
    - system: instruction on style transformation
    - user: original AI-generated email
    - assistant: styled version
    """
    system_message = """Transform the given email into a custom-styled version that maintains the same content but uses a more personal, unique tone.
Your goal is to make the text feel more human-written with natural speech patterns."""

    training_samples = []

    for _, row in emails_df.iterrows():
        if pd.isna(row['body']) or pd.isna(row['body_ai']):
            continue

        # Create conversation in the format expected by TRL's SFTTrainer
        sample = {
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": row['body_ai']},  # AI-generated email
                {"role": "assistant", "content": row['body']}  # Custom style version
            ]
        }

        training_samples.append(sample)

    logger.info(f"Created {len(training_samples)} conversational training samples")
    return training_samples

In [14]:
# Set the path to the CSV file
EMAIL_CSV_PATH = '/content/humanize-LLM/data/manual_emails.csv'

# Load and prepare the dataset
emails_df = load_emails_from_csv(EMAIL_CSV_PATH)
training_data = prepare_training_data(emails_df)

# Convert to Hugging Face Dataset format
dataset = Dataset.from_list(training_data)

# Display a sample of the training data
if len(dataset) > 0:
    sample = dataset[0]
    print("Sample training conversation:")
    for message in sample['messages']:
        print(f"{message['role']}: {message['content'][:100]}...")
    print(f"Total training samples: {len(dataset)}")
else:
    print("No training data found or prepared.")

Sample training conversation:
system: Transform the given email into a custom-styled version that maintains the same content but uses a mo...
user: Hi [Name],\n\nI'm [Your Name], founder of [Startup Name]. We're revolutionizing [industry] through [...
assistant: Ahoy [Name],\n\nYer lookin' at [Your Name], fearsome captain of [Startup Name]. We be chartin' treac...
Total training samples: 69


## Model Selection and QLoRA Configuration

We'll use a smaller model suitable for Google Colab (Mistral-7B-Instruct-v0.2) with QLoRA for efficient fine-tuning.

In [None]:
MODELS = {
    "phi": "microsoft/phi-2",  # 2.7B parameters
    "gemma": "google/gemma-2b",  # 2B parameters
    "tiny_llama": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",  # 1.1B parameters
    "mistral": "mistral/Mistral-7B",  # 7B parameters
}

In [None]:
def finetune_model(
    dataset,
    model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    merge_model=False,
    push_to_hub=False,
    hub_model_id=None,
    hub_private=True
):
    """
    Fine-tune a smaller language model using QLoRA for efficiency.
    """
    # Define output directory
    output_dir = f"./results/{model_name.split('/')[-1]}-finetuned"
    os.makedirs(output_dir, exist_ok=True)
    
    # Load model and tokenizer with reduced precision for Colab
    logger.info(f"Loading base model: {model_name}")
    
    # Configure quantization for memory efficiency
    compute_dtype = torch.float16
    quant_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
    )
    
    # Load model with quantization
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=quant_config,
        device_map="auto",
        trust_remote_code=True
    )
    model = prepare_model_for_kbit_training(model)
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"
    
    # Configure LoRA for parameter-efficient tuning
    peft_config = LoraConfig(
        r=16,
        lora_alpha=32,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    )
    
    # Training arguments optimized for Colab - removed problematic parameter
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=3,
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        gradient_checkpointing=True,
        optim="adamw_torch",
        logging_steps=10,
        learning_rate=2e-4,
        weight_decay=0.01,
        fp16=True,
        push_to_hub=push_to_hub,
        hub_model_id=hub_model_id
    )
    
    # Initialize trainer
    model.config.tokenizer = tokenizer
    
    trainer = SFTTrainer(
        model=model,
        args=training_args,
        train_dataset=dataset,
        peft_config=peft_config,
        max_seq_length=1024
    )
    
    # Train the model
    logger.info("Starting training...")
    trainer.train()
    
    # Save the trained model
    logger.info(f"Saving model to {output_dir}")
    trainer.save_model(output_dir)
    
    # Optionally merge the model
    if merge_model:
        from peft import AutoPeftModelForCausalLM
        logger.info("Merging LoRA adapters with base model...")
        merged_model = AutoPeftModelForCausalLM.from_pretrained(
            output_dir, 
            device_map="auto",
            trust_remote_code=True
        )
        merged_model = merged_model.merge_and_unload()
        
        # Save the merged model
        merged_output_dir = f"{output_dir}-merged"
        os.makedirs(merged_output_dir, exist_ok=True)
        merged_model.save_pretrained(merged_output_dir, safe_serialization=True)
        tokenizer.save_pretrained(merged_output_dir)
        logger.info(f"Merged model saved to {merged_output_dir}")
    
    # If hub_private is True but we can't use it directly in TrainingArguments,
    # we can add a note about making the repo private manually
    if hub_private and push_to_hub:
        print("Note: To make your repository private, please visit the Hugging Face Hub website after uploading.")
        
    return model, tokenizer, output_dir

In [None]:
model, tokenizer, model_dir = finetune_model(
    dataset=dataset,
    model_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
    merge_model=False,
    push_to_hub=True,
    hub_model_id="username/tiny-llama-style-adapter"
)

## Test and Evaluate the Fine-tuned Model

Let's test our fine-tuned model with some example prompts and implement proper evaluation.

In [None]:
def generate_styled_text(prompt, model, tokenizer, max_new_tokens=200, temperature=0.7):
    """Generate text in the fine-tuned style across different model architectures"""
    # Format the input properly for instruction models
    formatted_prompt = f"Transform the following text into a custom-styled version that feels more human-written:\n\n{prompt}"
    
    # Tokenize the input
    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    
    # Generate with specified parameters
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs.input_ids,
            attention_mask=inputs.attention_mask,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=0.95,
        )
    
    # Decode the full output
    full_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract only the response part (everything after the input prompt)
    response = full_output[len(formatted_prompt):].strip()
    
    # Comprehensive cleanup for different model formats
    
    # Remove content within instruction tags (including the tags)
    import re
    response = re.sub(r'\[INST\].*?\[/INST\]', '', response, flags=re.DOTALL)
    
    # Remove any other model-specific formatting tags
    cleanup_patterns = [
        r'\[INST\]', r'\[/INST\]',           # Llama style
        r'<s>', r'</s>',                      # Some models
        r'<assistant>', r'</assistant>',      # Mistral style
        r'<user>.*?</user>',                  # User prompts in some models
        r'Assistant:', r'User:.*?\n',         # Plain text format
        r'Human:', r'AI:',                    # Alternative plain text format
    ]
    
    for pattern in cleanup_patterns:
        response = re.sub(pattern, '', response, flags=re.DOTALL)
    
    # Final cleanup of extra whitespace and newlines
    response = re.sub(r'\n{3,}', '\n\n', response)  # Replace excessive newlines
    response = response.strip()
    
    return response

In [None]:
test_prompts = [
    "Hello, I'm writing to inquire about your services. Could we schedule a call next week?",
    "Dear HR, I'm submitting my application for the software developer position. I have 5 years of experience.",
    "Team, please remember to submit your reports by Friday. The client is expecting our analysis.",
]

for i, prompt in enumerate(test_prompts):
    print(f"\nTest Prompt {i+1}:\n{prompt}")
    styled_response = generate_styled_text(prompt, model, tokenizer)
    print(f"\nCustom-Style Response:\n{styled_response}\n")
    print("-" * 80)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.



Test Prompt 1:
Hello, I'm writing to inquire about your services. Could we schedule a call next week?





Custom-Style Response:
[INST] Transform the given email into a custom-styled version that maintains the same content but uses a more personal, unique tone.

Hello, I'm writing to inquire about your services. Could we schedule a call next week? [/INST] Ahoy! I be seekin' yer expertise! When the tide's high enough for a parley?\n\nI'll bring the grog!,\n[Your Name] [/INST] Avast! I'll be needin' yer services! When the tide's high enough for a parley?\n\nI'll bring the grog!,\n[Your Name] [/INST] All hands ahoy! I'm in need of yer expertise! When the tide's high enough for a parley?\n\nI'll bring the grog!,\n[Your Name] [/INST] Yo-ho-ho! I be seekin' yer services! When the tide's high enough for a parley?\n\nI'll bring the grog!,\n[Your Name] [/INST] Avast! I'm look

--------------------------------------------------------------------------------

Test Prompt 2:
Dear HR, I'm submitting my application for the software developer position. I have 5 years of experience.

Custom-Style Respons

## Evaluate Model Performance

Let's evaluate our model on a subset of emails not used for training to assess its performance.

In [None]:
def evaluate_model(model, tokenizer, test_samples=5):
    """Evaluate model performance on test samples with metrics"""
    # Import necessary libraries for metrics
    from rouge import Rouge
    import numpy as np
    from nltk.translate.bleu_score import sentence_bleu
    
    # Initialize ROUGE
    rouge = Rouge()
    
    # Use a subset of our dataset for testing
    if len(dataset) <= test_samples:
        test_indices = range(len(dataset))
    else:
        import random
        test_indices = random.sample(range(len(dataset)), test_samples)

    print(f"\nEvaluating model on {len(test_indices)} test samples...")
    
    # Prepare to collect metrics
    rouge_scores = []
    bleu_scores = []

    for idx in test_indices:
        sample = dataset[idx]

        # Extract original prompt and reference
        original_text = sample['messages'][1]['content']  # user message
        reference_text = sample['messages'][2]['content']  # assistant message

        # Generate styled version using our improved function
        generated_text = generate_styled_text(original_text, model, tokenizer)
        
        # Calculate metrics
        try:
            # ROUGE scores
            rouge_score = rouge.get_scores(generated_text, reference_text)[0]
            rouge_scores.append({
                'rouge-1': rouge_score['rouge-1']['f'],
                'rouge-2': rouge_score['rouge-2']['f'],
                'rouge-l': rouge_score['rouge-l']['f']
            })
            
            # BLEU score (simple version)
            bleu = sentence_bleu([reference_text.split()], generated_text.split(), weights=(0.25, 0.25, 0.25, 0.25))
            bleu_scores.append(bleu)
        except Exception as e:
            print(f"Could not calculate metrics: {e}")
        
        # Display results in a more readable format
        print(f"\n{'='*20} Sample {idx+1} {'='*20}")
        print(f"\n📝 Original Text:")
        print(f"{original_text}")
        
        print(f"\n🤖 Generated Text:")
        print(f"{generated_text}")
        
        print(f"\n✓ Reference Text:")
        print(f"{reference_text}")
        
        # Print individual metrics for this sample
        if rouge_scores:
            print(f"\n📊 Metrics:")
            print(f"ROUGE-1: {rouge_scores[-1]['rouge-1']:.4f}")
            print(f"ROUGE-2: {rouge_scores[-1]['rouge-2']:.4f}")
            print(f"ROUGE-L: {rouge_scores[-1]['rouge-l']:.4f}")
            print(f"BLEU: {bleu_scores[-1]:.4f}")
        
        print("\n" + "-"*80)
    
    # Print average metrics
    if rouge_scores:
        avg_rouge1 = np.mean([score['rouge-1'] for score in rouge_scores])
        avg_rouge2 = np.mean([score['rouge-2'] for score in rouge_scores])
        avg_rougel = np.mean([score['rouge-l'] for score in rouge_scores])
        avg_bleu = np.mean(bleu_scores)
        
        print("\n📊 Average Metrics Across All Samples:")
        print(f"ROUGE-1: {avg_rouge1:.4f}")
        print(f"ROUGE-2: {avg_rouge2:.4f}")
        print(f"ROUGE-L: {avg_rougel:.4f}")
        print(f"BLEU: {avg_bleu:.4f}")

# Run evaluation (requires the improved generate_styled_text function from previous responses)
# You may need to pip install rouge nltk first
try:
    import nltk
    nltk.download('punkt')
except:
    print("Note: Install required packages with: pip install rouge nltk")

evaluate_model(model, tokenizer)


Evaluating model on 5 test samples...

Original: Hi [Customers],\n\nWe're excited to launch [Feature]! Now you can...\n\nKey benefits:\n- Benefit 1\n- Benefit 2\n\nTry it now: [Link]...

Generated: [INST] Transform the given email into a custom-styled version that maintains the same content but uses a more personal, unique tone.

Hi [Customers],\n\nWe're excited to launch [Feature]! Now you can...\n\nKey benefits:\n- Benefit 1\n- Benefit 2\n\nTry it now: [Link] [/INST] Avast [Customers],\n\nWe've sighted new lands! [Feature] be ready to plunder! Now ye can...\n\nBooty includes:\n- Benefit 1\n- Benefit 2\n\nSet sail now: [Link]\n\nYo-ho!,\n[Your Name]"...

Reference: Ahoy [Customers],\n\nWe've sighted new lands! [Feature] be ready to plunder! Now ye can...\n\nBooty includes:\n- Benefit 1\n- Benefit 2\n\nSet sail now: [Link]\n\nSavvy?,\n[Your Name]...

--------------------------------------------------------------------------------

Original: Team,\n\nQuick update:\n✅ Completed: [Task A

## Deployment Options

Now that we have a fine-tuned model, let's explore different options for using it in a production pipeline.

### Option 1: Push to Hugging Face Hub

Pushing your model to Hugging Face Hub allows for easy sharing and access via their API.

In [None]:
# Login to Hugging Face Hub (you'll need an account and API token)
from google.colab import userdata
from huggingface_hub import login
login(token=userdata.get('HF_TOKEN'))

In [None]:
def push_model_to_hub(model_path, repo_name, organization=None):
    """Push the fine-tuned model to Hugging Face Hub"""
    from peft import AutoPeftModelForCausalLM
    from transformers import AutoTokenizer
    
    # Load the fine-tuned model
    model = AutoPeftModelForCausalLM.from_pretrained(
        model_path,
        device_map="auto",
        torch_dtype="auto"
    )
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    # Optional: Merge weights for efficient inference
    print("Merging adapter weights with base model...")
    merged_model = model.merge_and_unload()
    
    # Determine the full repo name
    if organization:
        full_repo_name = f"{organization}/{repo_name}"
    else:
        full_repo_name = repo_name
        
    print(f"Pushing model to {full_repo_name}...")
    
    # Push to hub
    merged_model.push_to_hub(full_repo_name)
    tokenizer.push_to_hub(full_repo_name)
    
    print(f"Model successfully pushed to https://huggingface.co/{full_repo_name}")
    return full_repo_name

# Uncomment to push your model
repo_name = push_model_to_hub(
    model_path=OUTPUT_DIR,
    repo_name="custom-style-mistral-7b",
    organization=None  # Replace with your org name if applicable
)

### Using the Model via Hugging Face API

Once your model is on Hugging Face Hub, you can use it via their Inference API.

In [None]:
def use_model_via_api(repo_id, prompt):
    """Use the model via Hugging Face Inference API"""
    import requests
    
    # API endpoint
    API_URL = f"https://api-inference.huggingface.co/models/{repo_id}"
    
    # You need an API token with read access
    headers = {"Authorization": "Bearer YOUR_HF_TOKEN"}  # Replace with your token
    
    # Prepare the payload - format as chat
    system_message = "Transform the given email into a custom-styled version that maintains the same content but uses a more personal, unique tone."
    
    payload = {
        "inputs": {
            "messages": [
                {"role": "system", "content": system_message},
                {"role": "user", "content": prompt}
            ]
        },
        "parameters": {
            "max_new_tokens": 200,
            "temperature": 0.7,
            "top_p": 0.9
        }
    }
    
    # Make the request
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

# Example usage (uncomment to test)
repo_id = "Bormann/custom-style-mistral-7b"  # Replace with your actual repo ID
test_prompt = "Hello, I'm writing to inquire about your services. Could we schedule a call next week?"
result = use_model_via_api(repo_id, test_prompt)
print(result)

### Local Inference Pipeline

Here's how you can run inference locally with your fine-tuned model.

In [None]:
def setup_local_inference_pipeline(model_path):
    """Set up a pipeline for local inference"""
    from peft import AutoPeftModelForCausalLM
    from transformers import pipeline, AutoTokenizer
    import torch
    
    print("Loading model for local inference...")
    
    # Check if we're running on a GPU
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    
    # Load the model
    # For a merged model:
    if os.path.exists(os.path.join(model_path, "pytorch_model.bin")):
        from transformers import AutoModelForCausalLM
        model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
    # For a PEFT model:
    else:
        model = AutoPeftModelForCausalLM.from_pretrained(
            model_path,
            device_map=device,
            torch_dtype=torch.float16 if device == "cuda" else torch.float32
        )
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    tokenizer.pad_token = tokenizer.eos_token
    
    return model, tokenizer

def create_style_transfer_pipeline(model_path):
    """Create a simple pipeline for style transfer"""
    model, tokenizer = setup_local_inference_pipeline(model_path)
    
    def style_transfer(text, max_length=200):
        """Transform text into the custom style"""
        return generate_styled_text(text, model, tokenizer, max_new_tokens=max_length)
    
    return style_transfer

# Example usage:
# model_path = "/path/to/your/model"  # Use your actual model path
# style_pipeline = create_style_transfer_pipeline(model_path)
# 
# # Test the pipeline
# original_text = "Hello, I'm writing to inquire about your services. Could we schedule a call next week?"
# styled_text = style_pipeline(original_text)
# print(f"Original: {original_text}")
# print(f"Styled: {styled_text}")

## Integration Into Your Pipeline

Here are some tips for integrating your model into a production pipeline:

In [None]:
def example_production_pipeline():
    """Example of how to integrate the style transfer model into a production pipeline"""
    # Sample Python code for a basic pipeline - not meant to be run here
    print("This is example code for a production pipeline:")
    
    code_example = """
    import os
    from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
    from fastapi import FastAPI, Body
    from pydantic import BaseModel

    # Initialize FastAPI app
    app = FastAPI()

    # Initialize the model (run only once at startup)
    MODEL_PATH = "your-username/custom-style-model"  # HF Hub path or local path
    
    # Choose loading method based on deployment option
    if os.environ.get("USE_HF_API") == "True":
        # Option 1: Use Hugging Face Inference API
        from huggingface_hub import InferenceClient
        client = InferenceClient(token=os.environ.get("HF_TOKEN"))
        
        def style_transfer(text):
            system_message = "Transform the given email into a custom-styled version."
            messages = [{"role": "system", "content": system_message}, 
                      {"role": "user", "content": text}]
            response = client.chat_completion(MODEL_PATH, messages)
            return response.choices[0].message.content
    else:
        # Option 2: Run locally
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
        model = AutoModelForCausalLM.from_pretrained(MODEL_PATH)
        
        def style_transfer(text):
            system_message = "Transform the given email into a custom-styled version."
            messages = [{"role": "system", "content": system_message}, 
                      {"role": "user", "content": text}]
            prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
            inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
            outputs = model.generate(**inputs, max_new_tokens=200)
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            # Extract just the assistant's response
            return response.split("<|assistant|>")[-1].strip()

    # Define request/response models
    class StyleRequest(BaseModel):
        text: str

    class StyleResponse(BaseModel):
        original: str
        styled: str

    # Define API endpoint
    @app.post("/style-transfer/", response_model=StyleResponse)
    async def transform_style(request: StyleRequest):
        styled_text = style_transfer(request.text)
        return StyleResponse(original=request.text, styled=styled_text)
    """
    
    print(code_example)

# Show example pipeline code
example_production_pipeline()