In [1]:
# Cell 1: Install Dependencies
!pip install -q transformers datasets peft accelerate pandas tqdm bitsandbytes


In [2]:

# Cell 2: Import Libraries
import os
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig
)
from peft import (
    LoraConfig,
    get_peft_model,
    TaskType,
    prepare_model_for_kbit_training
)

In [None]:

# Cell 3: Set up Environment
# Use Llama 3 model instead of TinyLlama
model_id = "meta-llama/Meta-Llama-3-8B"  # You can change this to other Llama 3 variants
output_dir = "./llama3-lora-finetuned"

# Create output directory
os.makedirs(output_dir, exist_ok=True)

# Check for GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")



Using device: cuda


In [None]:

# Cell 4: Load Dataset
def load_dataset():
    print("Loading dataset...")
    df = pd.read_csv("Dataset.csv")
    
    # Updated prompt template to match Llama 3 chat format
    df["text"] = df.apply(
        lambda row: f"<|im_start|>user\n{row['Question']}<|im_end|>\n<|im_start|>assistant\n{row['Answer']}<|im_end|>", 
        axis=1
    )
    
    # Split into train and test
    train_df = df.sample(frac=0.9, random_state=42)
    test_df = df.drop(train_df.index)
    
    # Convert to Hugging Face datasets
    train_dataset = Dataset.from_pandas(train_df[["text"]])
    test_dataset = Dataset.from_pandas(test_df[["text"]])
    
    print(f"Loaded {len(train_dataset)} training examples and {len(test_dataset)} test examples")
    return train_dataset, test_dataset

train_dataset, test_dataset = load_dataset()

Loading dataset...
Loaded 152 training examples and 17 test examples


In [None]:
# Cell 5: Load Model and Tokenizer
def load_model_and_tokenizer():
    print("Loading model and tokenizer...")
    
    # Configure quantization for memory efficiency
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )
    
    # Import and use login for authentication
    from huggingface_hub import login
    login(token="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")  # Replace with your actual token
    
    # Load tokenizer with token authentication
    tokenizer = AutoTokenizer.from_pretrained(
        model_id,
        token="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"  # Replace with your actual token
    )
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Load model with quantization and token authentication
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        device_map="auto",
        token="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",  # Replace with your actual token
        trust_remote_code=True
    )
    
    print("Model and tokenizer loaded successfully")
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer()

Loading model and tokenizer...


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [6]:


# Cell 6: Preprocessing
def preprocess_data(train_dataset, test_dataset, tokenizer):
    print("Preprocessing data...")
    
    def tokenize_function(examples):
        return tokenizer(
            examples["text"],
            truncation=True,
            max_length=1024,  # Increased for Llama 3
            padding="max_length"
        )
    
    # Tokenize datasets
    tokenized_train = train_dataset.map(tokenize_function, batched=True)
    tokenized_test = test_dataset.map(tokenize_function, batched=True)
    
    print("Data preprocessing complete")
    return tokenized_train, tokenized_test

tokenized_train, tokenized_test = preprocess_data(train_dataset, test_dataset, tokenizer)


Preprocessing data...


Map:   0%|          | 0/152 [00:00<?, ? examples/s]

Map:   0%|          | 0/17 [00:00<?, ? examples/s]

Data preprocessing complete


In [None]:

# Cell 7: Apply LoRA
def apply_lora(model):
    print("Applying LoRA...")
    
    # Prepare model for training
    model = prepare_model_for_kbit_training(model)
    
    # Define LoRA configuration - adjusted for Llama 3
    lora_config = LoraConfig(
        r=16,                    # Rank - increased for Llama 3
        lora_alpha=32,           # Alpha parameter 
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Target more modules for Llama 3
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.CAUSAL_LM
    )
    
    # Apply LoRA to model
    model = get_peft_model(model, lora_config)
    
    # Print trainable parameters info
    print(f"Total parameters: {model.num_parameters()}")
    print(f"Trainable parameters: {model.num_parameters(only_trainable=True)}")
    
    return model

model = apply_lora(model)


Applying LoRA...
Total parameters: 8043892736
Trainable parameters: 13631488


In [None]:

# Cell 8: Training Setup
def setup_trainer(model, tokenized_train, tokenized_test, tokenizer):
    print("Setting up trainer...")
    
    # Training arguments - adjusted for Llama 3
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=10,              
        per_device_train_batch_size=2,   # Reduced for larger model
        per_device_eval_batch_size=2,    # Reduced for larger model
        gradient_accumulation_steps=4,   # Added for larger model
        warmup_steps=50,                # Increased for Llama 3
        logging_steps=10,               
        save_steps=50,                  
        learning_rate=5e-5,              # Adjusted for Llama 3
        weight_decay=0.01,               
        fp16=True if torch.cuda.is_available() else False,                       
        report_to="none",
        lr_scheduler_type="cosine",      # Added for Llama 3
        max_grad_norm=0.3,              # Added for stability
    )
    
    # Data collator
    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )
    
    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        data_collator=data_collator,
    )
    
    return trainer

trainer = setup_trainer(model, tokenized_train, tokenized_test, tokenizer)


No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Setting up trainer...


In [9]:
# Cell 9: Training
def train():
    print("Starting training...")
    trainer.train()
    
    # Save the fine-tuned model
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    print(f"Model saved to {output_dir}")


train()


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Starting training...




Step,Training Loss
10,2.9139
20,2.8121
30,2.3554
40,1.6974
50,1.2851
60,1.0975
70,1.0387
80,0.9527
90,0.8961
100,0.8309




Model saved to ./llama3-lora-finetuned


In [10]:
# Cell 10: Evaluation
def evaluate():
    print("Evaluating model...")
    eval_results = trainer.evaluate()
    print(f"Evaluation results: {eval_results}")
    return eval_results

evaluate()


Evaluating model...


Evaluation results: {'eval_loss': 0.9683107733726501, 'eval_runtime': 3.6203, 'eval_samples_per_second': 4.696, 'eval_steps_per_second': 2.486, 'epoch': 10.0}


{'eval_loss': 0.9683107733726501,
 'eval_runtime': 3.6203,
 'eval_samples_per_second': 4.696,
 'eval_steps_per_second': 2.486,
 'epoch': 10.0}

In [12]:
# Install necessary packages
!pip install evaluate rouge-score transformers torch pandas numpy

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import evaluate
import pandas as pd
import numpy as np

# Load your model
model_path = "./merged_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto"
)

# Define test cases with banking-specific scenarios
test_cases = [
    {
        "question": "How should I handle a customer who wants to dispute a transaction?",
        "reference_answer": "First, listen carefully to the customer's concern. Ask for the transaction details including date, amount, and merchant. Explain the dispute process and timeframe. Document all details in the system. If the transaction is fraudulent, follow the fraud protocol and offer to cancel the card. If it's a merchant dispute, help the customer file a formal dispute form. Assure them you'll follow up within 5-7 business days with updates."
    },
    {
        "question": "What's the process for helping a new customer open a checking account?",
        "reference_answer": "First, verify the customer's identity with two forms of ID. Explain our checking account options and their features. Once they select an account type, complete the application form in the system. Collect the initial deposit amount. Review all terms and conditions. Set up online banking access. Provide them with temporary checks and explain when their debit card and official checks will arrive. Inform them about our mobile app features."
    },
    {
        "question": "How do I handle a customer who is upset about overdraft fees?",
        "reference_answer": "Remain calm and professional. Listen actively to understand their specific concern. Review their account history to verify the overdraft charges. Explain our overdraft policy clearly. If this is their first occurrence, consider waiving the fee as a courtesy. Offer to set up overdraft protection services to prevent future fees. Document the interaction and any fee waivers in the customer's account notes."
    },
    {
        "question": "What should I tell customers about our mobile banking security features?",
        "reference_answer": "Inform customers that our mobile banking app uses industry-leading encryption standards. Highlight the multi-factor authentication process that requires both password and biometric verification. Explain that we never store sensitive account information directly on their device. Mention our automatic timeout feature, transaction monitoring system, and instant fraud alerts. Emphasize that we provide zero liability protection for unauthorized transactions reported promptly."
    },
    {
        "question": "How should I respond when a customer asks for a loan but has poor credit?",
        "reference_answer": "Acknowledge their application respectfully without immediate rejection. Review their complete financial profile beyond just the credit score. Consider offering secured loan options or credit-builder products. Explain how our bank's financial education resources can help improve their credit over time. If we can't approve them now, provide specific reasons and suggestions for improvements. Offer to schedule a follow-up meeting in 3-6 months to reassess their situation."
    }
]

# Function to generate responses from your model
def generate_response(question, temperature=0.5):
    prompt = f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"
    
    # Ensure proper tokenization with attention mask
    inputs = tokenizer(prompt, return_tensors="pt", padding=True)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=512,
            temperature=temperature,
            top_p=0.9,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.eos_token_id
        )
    
    full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
    try:
        assistant_text = full_response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
        return assistant_text
    except:
        # Fallback if token extraction fails
        return tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)

# Generate model responses for all test cases
print("Generating responses for evaluation...")
results = []
for test_case in test_cases:
    generated = generate_response(test_case["question"])
    results.append({
        "question": test_case["question"],
        "reference": test_case["reference_answer"],
        "generated": generated
    })

# Convert to DataFrame for easier analysis
results_df = pd.DataFrame(results)

# Calculate ROUGE scores
print("Calculating ROUGE metrics...")
rouge = evaluate.load('rouge')

# Calculate ROUGE for each test case individually
individual_scores = []
for i, row in results_df.iterrows():
    score = rouge.compute(
        predictions=[row['generated']],
        references=[row['reference']],
        use_aggregator=True
    )
    individual_scores.append({
        "question_id": i,
        "question": row['question'][:50] + "...",
        "rouge1": score['rouge1'],
        "rouge2": score['rouge2'],
        "rougeL": score['rougeL']
    })

# Calculate aggregate ROUGE scores
aggregate_scores = rouge.compute(
    predictions=results_df['generated'].tolist(),
    references=results_df['reference'].tolist(),
    use_aggregator=True
)

# Display results
print("\n===== ROUGE Evaluation Results =====\n")

print("Individual Question Scores:")
individual_df = pd.DataFrame(individual_scores)
print(individual_df.to_string(index=False))

print("\nAggregate Scores:")
for metric, value in aggregate_scores.items():
    print(f"{metric}: {value:.4f}")

print("\n===== Sample Generated Responses =====\n")
for i, row in results_df.iterrows():
    print(f"Question {i+1}: {row['question']}")
    print(f"Generated: {row['generated'][:150]}...\n")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


Generating responses for evaluation...
Calculating ROUGE metrics...

===== ROUGE Evaluation Results =====

Individual Question Scores:
 question_id                                              question   rouge1   rouge2   rougeL
           0 How should I handle a customer who wants to disput... 0.195652 0.044444 0.108696
           1 What's the process for helping a new customer open... 0.202020 0.020619 0.101010
           2 How do I handle a customer who is upset about over... 0.136364 0.023256 0.090909
           3 What should I tell customers about our mobile bank... 0.269663 0.114943 0.247191
           4 How should I respond when a customer asks for a lo... 0.193548 0.021978 0.107527

Aggregate Scores:
rouge1: 0.1994
rouge2: 0.0448
rougeL: 0.1311
rougeLsum: 0.1311

===== Sample Generated Responses =====

Question 1: How should I handle a customer who wants to dispute a transaction?
Generated: If the customer disputes a transaction, follow bank guidelines for investigation. Provid