In [4]:
import os
import json
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import (
    LlamaForSequenceClassification,
    LlamaTokenizer,
    get_linear_schedule_with_warmup,
    AutoConfig
)
from peft import get_peft_model, LoraConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch.nn.functional as F
from huggingface_hub import login
import bitsandbytes as bnb
import re
import gc
import seaborn as sns
import matplotlib.pyplot as plt

def setup_environment():
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

class EmailDataset(Dataset):
    def __init__(self, emails_df, tokenizer, max_length=512):
        self.emails_df = emails_df.copy()
        self.emails_df['sender'] = self.emails_df['sender'].apply(clean_text)
        self.emails_df['subject'] = self.emails_df['subject'].apply(clean_text)
        self.emails_df['body'] = self.emails_df['body'].apply(clean_text)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.emails_df)

    def __getitem__(self, idx):
        email = self.emails_df.iloc[idx]
        input_text = f"Sender: {email['sender']} [SEP] Subject: {email['subject']} [SEP] {email['body']}"
        
        encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(email['label'], dtype=torch.long)
        }

def clean_text(text):
    if not isinstance(text, str):
        text = ""
    #text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    #text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def setup_model_and_tokenizer(model_name, device):
    tokenizer = LlamaTokenizer.from_pretrained(model_name)
    tokenizer.padding_side = "right"
    tokenizer.pad_token = tokenizer.eos_token
    
    model_config = AutoConfig.from_pretrained(model_name)
    model_config.num_labels = 2
    model_config.pad_token_id = tokenizer.pad_token_id
    model_config.use_cache = False
    
    quantization_config = BitsAndBytesConfig(load_in_8bit=True)
    base_model = LlamaForSequenceClassification.from_pretrained(
        model_name,
        config=model_config,
        torch_dtype=torch.bfloat16,
        quantization_config=quantization_config
    )
    
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="SEQ_CLS"
    )
    model = get_peft_model(base_model, lora_config)
    model.gradient_checkpointing_enable()
    return model, tokenizer

def compute_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)
    conf_matrix = confusion_matrix(labels, preds)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    }

def plot_confusion_matrix(conf_matrix, output_dir):
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    #plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
    plt.close()

def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0
    
    with torch.no_grad():
        for batch in data_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            
            loss = outputs.loss
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['label'].cpu().numpy())
    
    metrics = compute_metrics(all_preds, all_labels)
    metrics['loss'] = total_loss / len(data_loader)
    return metrics

def train_model(model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=8):
    best_val_metrics = {'f1': 0}
    best_model_state = None
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_preds = []
        train_labels = []
        
        for step, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            
            loss = outputs.loss
            loss.backward()
            
            if (step + 1) % 2 == 0:  # Gradient accumulation steps = 2
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
            
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=-1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(batch['label'].cpu().numpy())
            
            if step % 10 == 0:
                print(f"Epoch {epoch+1}, Step {step}: Loss = {loss.item():.4f}")
            
            torch.cuda.empty_cache()
  
        train_metrics = compute_metrics(train_preds, train_labels)
        train_metrics['loss'] = total_loss / len(train_loader)
      
        val_metrics = evaluate_model(model, val_loader, device)
      
        print(f"\nEpoch {epoch + 1} Summary:")
        print("Training Metrics:")
        for metric, value in train_metrics.items():
            if metric != 'confusion_matrix':
                print(f"{metric}: {value:.4f}")
        
        print("\nValidation Metrics:")
        for metric, value in val_metrics.items():
            if metric != 'confusion_matrix':
                print(f"{metric}: {value:.4f}")
        
        # Save best model
        if val_metrics['f1'] > best_val_metrics['f1']:
            best_val_metrics = val_metrics
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
        
        torch.cuda.empty_cache()
        gc.collect()
    
    return best_model_state, best_val_metrics

def main():
    login(token="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")
    device = setup_environment()
    model_name = 'meta-llama/Llama-2-7b-chat-hf'
    data_path = os.path.expanduser("~/Downloads/Tune/FineTune/newdata_cleaned.csv")
    
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"File not found: {data_path}")
    
    model, tokenizer = setup_model_and_tokenizer(model_name, device)
    emails_df = pd.read_csv(data_path)
    
    train_df, val_df = train_test_split(emails_df, test_size=0.2, stratify=emails_df['label'], random_state=42)
    
    train_dataset = EmailDataset(train_df, tokenizer, max_length=512)
    val_dataset = EmailDataset(val_df, tokenizer, max_length=512)
    
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, num_workers=2, pin_memory=True)
    
    optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=2e-5)
    num_epochs = 8
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = num_training_steps // 20
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    best_model_state, best_metrics = train_model(
        model,
        train_loader,
        val_loader,
        optimizer,
        scheduler,
        device,
        num_epochs=num_epochs
    )
    
    
    output_dir = os.path.expanduser("~/Downloads/Tune/FineTune/binary_classification_7B")
    os.makedirs(output_dir, exist_ok=True)
    
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

    #plot_confusion_matrix(best_metrics['confusion_matrix'], output_dir)
    
  
    config = {
        "model_name": model_name,
        "num_epochs": num_epochs,
        "learning_rate": 2e-5,
        "batch_size": 8,
        "max_length": 512,
        "warmup_steps": num_warmup_steps,
        "total_steps": num_training_steps,
        "device": str(device),
        "best_metrics": {k: float(v) if k != 'confusion_matrix' else v.tolist() 
                        for k, v in best_metrics.items()}
    }
    
    with open(os.path.join(output_dir, "training_config.json"), "w") as f:
        json.dump(config, f, indent=2)

if __name__ == "__main__":
    main()

Using GPU: NVIDIA RTX A5000
GPU Memory: 23.68 GB


`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-2-7b-chat-hf and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Step 0: Loss = 1.6707
Epoch 1, Step 10: Loss = 1.1563
Epoch 1, Step 20: Loss = 3.1397
Epoch 1, Step 30: Loss = 1.0593
Epoch 1, Step 40: Loss = 2.2507
Epoch 1, Step 50: Loss = 1.3200
Epoch 1, Step 60: Loss = 0.8110
Epoch 1, Step 70: Loss = 3.3866
Epoch 1, Step 80: Loss = 1.5304
Epoch 1, Step 90: Loss = 3.1797
Epoch 1, Step 100: Loss = 1.3592
Epoch 1, Step 110: Loss = 1.3921
Epoch 1, Step 120: Loss = 1.3436
Epoch 1, Step 130: Loss = 0.3368
Epoch 1, Step 140: Loss = 1.7129
Epoch 1, Step 150: Loss = 0.8212
Epoch 1, Step 160: Loss = 2.4932
Epoch 1, Step 170: Loss = 2.8405
Epoch 1, Step 180: Loss = 0.8025
Epoch 1, Step 190: Loss = 2.3386
Epoch 1, Step 200: Loss = 2.2928
Epoch 1, Step 210: Loss = 1.2434
Epoch 1, Step 220: Loss = 1.9094
Epoch 1, Step 230: Loss = 2.4838
Epoch 1, Step 240: Loss = 0.4476
Epoch 1, Step 250: Loss = 2.3106
Epoch 1, Step 260: Loss = 0.9868
Epoch 1, Step 270: Loss = 2.3863
Epoch 1, Step 280: Loss = 0.8535
Epoch 1, Step 290: Loss = 2.2728
Epoch 1, Step 300: Lo




Epoch 1 Summary:
Training Metrics:
accuracy: 0.5615
precision: 0.5400
recall: 0.8300
f1: 0.6543
loss: 1.5871

Validation Metrics:
accuracy: 0.5700
precision: 0.5500
recall: 0.7700
f1: 0.6417
loss: 1.1153




Epoch 2, Step 0: Loss = 0.0726
Epoch 2, Step 10: Loss = 0.2109
Epoch 2, Step 20: Loss = 0.1589
Epoch 2, Step 30: Loss = 1.8821
Epoch 2, Step 40: Loss = 1.2603
Epoch 2, Step 50: Loss = 1.0367
Epoch 2, Step 60: Loss = 1.4289
Epoch 2, Step 70: Loss = 0.9062
Epoch 2, Step 80: Loss = 1.5054
Epoch 2, Step 90: Loss = 1.0338
Epoch 2, Step 100: Loss = 1.1755
Epoch 2, Step 110: Loss = 0.9556
Epoch 2, Step 120: Loss = 1.4119
Epoch 2, Step 130: Loss = 1.2785
Epoch 2, Step 140: Loss = 0.4764
Epoch 2, Step 150: Loss = 1.5515
Epoch 2, Step 160: Loss = 0.9627
Epoch 2, Step 170: Loss = 0.4925
Epoch 2, Step 180: Loss = 0.6196
Epoch 2, Step 190: Loss = 1.1876
Epoch 2, Step 200: Loss = 0.7838
Epoch 2, Step 210: Loss = 0.2412
Epoch 2, Step 220: Loss = 0.5748
Epoch 2, Step 230: Loss = 0.1831
Epoch 2, Step 240: Loss = 0.2321
Epoch 2, Step 250: Loss = 0.9516
Epoch 2, Step 260: Loss = 0.9907
Epoch 2, Step 270: Loss = 1.1221
Epoch 2, Step 280: Loss = 0.5315
Epoch 2, Step 290: Loss = 0.9457
Epoch 2, Step 300: Lo




Epoch 2 Summary:
Training Metrics:
accuracy: 0.6115
precision: 0.5936
recall: 0.7067
f1: 0.6452
loss: 0.9141

Validation Metrics:
accuracy: 0.6625
precision: 0.6462
recall: 0.7183
f1: 0.6803
loss: 0.7498




Epoch 3, Step 0: Loss = 0.3411
Epoch 3, Step 10: Loss = 0.9979
Epoch 3, Step 20: Loss = 0.9834
Epoch 3, Step 30: Loss = 1.1315
Epoch 3, Step 40: Loss = 1.1075
Epoch 3, Step 50: Loss = 0.7082
Epoch 3, Step 60: Loss = 0.7770
Epoch 3, Step 70: Loss = 1.1067
Epoch 3, Step 80: Loss = 0.6393
Epoch 3, Step 90: Loss = 1.4462
Epoch 3, Step 100: Loss = 0.7740
Epoch 3, Step 110: Loss = 1.4950
Epoch 3, Step 120: Loss = 0.2545
Epoch 3, Step 130: Loss = 0.5613
Epoch 3, Step 140: Loss = 0.4366
Epoch 3, Step 150: Loss = 0.4676
Epoch 3, Step 160: Loss = 0.8820
Epoch 3, Step 170: Loss = 0.6953
Epoch 3, Step 180: Loss = 1.8080
Epoch 3, Step 190: Loss = 1.0954
Epoch 3, Step 200: Loss = 0.2043
Epoch 3, Step 210: Loss = 0.3592
Epoch 3, Step 220: Loss = 0.3342
Epoch 3, Step 230: Loss = 0.7810
Epoch 3, Step 240: Loss = 0.3942
Epoch 3, Step 250: Loss = 0.5145
Epoch 3, Step 260: Loss = 0.4310
Epoch 3, Step 270: Loss = 1.3418
Epoch 3, Step 280: Loss = 1.4158
Epoch 3, Step 290: Loss = 0.4292
Epoch 3, Step 300: Lo




Epoch 3 Summary:
Training Metrics:
accuracy: 0.6946
precision: 0.6837
recall: 0.7242
f1: 0.7034
loss: 0.6917

Validation Metrics:
accuracy: 0.7208
precision: 0.7011
recall: 0.7700
f1: 0.7339
loss: 0.6187




Epoch 4, Step 0: Loss = 0.1367
Epoch 4, Step 10: Loss = 0.3252
Epoch 4, Step 20: Loss = 0.7450
Epoch 4, Step 30: Loss = 1.2430
Epoch 4, Step 40: Loss = 0.2624
Epoch 4, Step 50: Loss = 0.5536
Epoch 4, Step 60: Loss = 0.2195
Epoch 4, Step 70: Loss = 0.2389
Epoch 4, Step 80: Loss = 0.2511
Epoch 4, Step 90: Loss = 0.9341
Epoch 4, Step 100: Loss = 1.2449
Epoch 4, Step 110: Loss = 0.7884
Epoch 4, Step 120: Loss = 0.4777
Epoch 4, Step 130: Loss = 0.3839
Epoch 4, Step 140: Loss = 0.9442
Epoch 4, Step 150: Loss = 0.7652
Epoch 4, Step 160: Loss = 0.8584
Epoch 4, Step 170: Loss = 0.3920
Epoch 4, Step 180: Loss = 0.8740
Epoch 4, Step 190: Loss = 0.8319
Epoch 4, Step 200: Loss = 0.2694
Epoch 4, Step 210: Loss = 0.7588
Epoch 4, Step 220: Loss = 0.4001
Epoch 4, Step 230: Loss = 0.8200
Epoch 4, Step 240: Loss = 0.2321
Epoch 4, Step 250: Loss = 0.6252
Epoch 4, Step 260: Loss = 0.3301
Epoch 4, Step 270: Loss = 0.3267
Epoch 4, Step 280: Loss = 0.5645
Epoch 4, Step 290: Loss = 0.5221
Epoch 4, Step 300: Lo




Epoch 4 Summary:
Training Metrics:
accuracy: 0.7310
precision: 0.7214
recall: 0.7529
f1: 0.7368
loss: 0.6051

Validation Metrics:
accuracy: 0.7458
precision: 0.7294
recall: 0.7817
f1: 0.7546
loss: 0.5679




Epoch 5, Step 0: Loss = 0.6638
Epoch 5, Step 10: Loss = 0.7121
Epoch 5, Step 20: Loss = 2.0743
Epoch 5, Step 30: Loss = 0.4075
Epoch 5, Step 40: Loss = 0.2275
Epoch 5, Step 50: Loss = 0.5907
Epoch 5, Step 60: Loss = 0.4010
Epoch 5, Step 70: Loss = 0.6592
Epoch 5, Step 80: Loss = 0.4173
Epoch 5, Step 90: Loss = 0.7529
Epoch 5, Step 100: Loss = 1.1738
Epoch 5, Step 110: Loss = 0.0558
Epoch 5, Step 120: Loss = 1.6863
Epoch 5, Step 130: Loss = 1.0101
Epoch 5, Step 140: Loss = 0.4699
Epoch 5, Step 150: Loss = 0.8325
Epoch 5, Step 160: Loss = 0.7231
Epoch 5, Step 170: Loss = 1.0997
Epoch 5, Step 180: Loss = 0.6439
Epoch 5, Step 190: Loss = 0.3900
Epoch 5, Step 200: Loss = 0.3667
Epoch 5, Step 210: Loss = 0.1978
Epoch 5, Step 220: Loss = 0.3740
Epoch 5, Step 230: Loss = 0.8362
Epoch 5, Step 240: Loss = 0.4573
Epoch 5, Step 250: Loss = 0.1732
Epoch 5, Step 260: Loss = 0.1033
Epoch 5, Step 270: Loss = 1.1292
Epoch 5, Step 280: Loss = 1.0765
Epoch 5, Step 290: Loss = 0.6569
Epoch 5, Step 300: Lo




Epoch 5 Summary:
Training Metrics:
accuracy: 0.7506
precision: 0.7397
recall: 0.7733
f1: 0.7562
loss: 0.5677

Validation Metrics:
accuracy: 0.7550
precision: 0.7347
recall: 0.7983
f1: 0.7652
loss: 0.5460




Epoch 6, Step 0: Loss = 0.7407
Epoch 6, Step 10: Loss = 0.7848
Epoch 6, Step 20: Loss = 0.6443
Epoch 6, Step 30: Loss = 0.6357
Epoch 6, Step 40: Loss = 1.0533
Epoch 6, Step 50: Loss = 0.3318
Epoch 6, Step 60: Loss = 0.6540
Epoch 6, Step 70: Loss = 0.2913
Epoch 6, Step 80: Loss = 0.7108
Epoch 6, Step 90: Loss = 1.2605
Epoch 6, Step 100: Loss = 0.9451
Epoch 6, Step 110: Loss = 0.3485
Epoch 6, Step 120: Loss = 0.1465
Epoch 6, Step 130: Loss = 0.2862
Epoch 6, Step 140: Loss = 0.9014
Epoch 6, Step 150: Loss = 0.1590
Epoch 6, Step 160: Loss = 0.6082
Epoch 6, Step 170: Loss = 0.5092
Epoch 6, Step 180: Loss = 0.2405
Epoch 6, Step 190: Loss = 0.3369
Epoch 6, Step 200: Loss = 0.1340
Epoch 6, Step 210: Loss = 0.7112
Epoch 6, Step 220: Loss = 0.9717
Epoch 6, Step 230: Loss = 0.3481
Epoch 6, Step 240: Loss = 0.7195
Epoch 6, Step 250: Loss = 0.7212
Epoch 6, Step 260: Loss = 0.1951
Epoch 6, Step 270: Loss = 0.8287
Epoch 6, Step 280: Loss = 0.7093
Epoch 6, Step 290: Loss = 0.5658
Epoch 6, Step 300: Lo




Epoch 6 Summary:
Training Metrics:
accuracy: 0.7590
precision: 0.7485
recall: 0.7800
f1: 0.7639
loss: 0.5521

Validation Metrics:
accuracy: 0.7608
precision: 0.7404
recall: 0.8033
f1: 0.7706
loss: 0.5355




Epoch 7, Step 0: Loss = 0.1310
Epoch 7, Step 10: Loss = 0.6518
Epoch 7, Step 20: Loss = 0.7682
Epoch 7, Step 30: Loss = 0.6046
Epoch 7, Step 40: Loss = 0.7722
Epoch 7, Step 50: Loss = 0.3215
Epoch 7, Step 60: Loss = 0.7306
Epoch 7, Step 70: Loss = 0.7962
Epoch 7, Step 80: Loss = 0.7922
Epoch 7, Step 90: Loss = 0.9849
Epoch 7, Step 100: Loss = 0.1571
Epoch 7, Step 110: Loss = 0.8098
Epoch 7, Step 120: Loss = 0.3091
Epoch 7, Step 130: Loss = 0.4993
Epoch 7, Step 140: Loss = 0.4128
Epoch 7, Step 150: Loss = 0.6371
Epoch 7, Step 160: Loss = 0.5698
Epoch 7, Step 170: Loss = 0.1370
Epoch 7, Step 180: Loss = 0.9648
Epoch 7, Step 190: Loss = 0.0418
Epoch 7, Step 200: Loss = 1.0346
Epoch 7, Step 210: Loss = 0.9877
Epoch 7, Step 220: Loss = 0.5247
Epoch 7, Step 230: Loss = 0.2863
Epoch 7, Step 240: Loss = 0.2896
Epoch 7, Step 250: Loss = 0.6123
Epoch 7, Step 260: Loss = 0.6313
Epoch 7, Step 270: Loss = 0.5180
Epoch 7, Step 280: Loss = 0.1816
Epoch 7, Step 290: Loss = 0.5728
Epoch 7, Step 300: Lo




Epoch 7 Summary:
Training Metrics:
accuracy: 0.7594
precision: 0.7465
recall: 0.7854
f1: 0.7655
loss: 0.5438

Validation Metrics:
accuracy: 0.7650
precision: 0.7424
recall: 0.8117
f1: 0.7755
loss: 0.5305




Epoch 8, Step 0: Loss = 0.2499
Epoch 8, Step 10: Loss = 0.2583
Epoch 8, Step 20: Loss = 0.3926
Epoch 8, Step 30: Loss = 0.8756
Epoch 8, Step 40: Loss = 0.2008
Epoch 8, Step 50: Loss = 0.2009
Epoch 8, Step 60: Loss = 0.9405
Epoch 8, Step 70: Loss = 1.1549
Epoch 8, Step 80: Loss = 0.6907
Epoch 8, Step 90: Loss = 0.8426
Epoch 8, Step 100: Loss = 0.6862
Epoch 8, Step 110: Loss = 1.0415
Epoch 8, Step 120: Loss = 0.1298
Epoch 8, Step 130: Loss = 0.3687
Epoch 8, Step 140: Loss = 0.6199
Epoch 8, Step 150: Loss = 0.6075
Epoch 8, Step 160: Loss = 0.2628
Epoch 8, Step 170: Loss = 0.2515
Epoch 8, Step 180: Loss = 1.1116
Epoch 8, Step 190: Loss = 0.7796
Epoch 8, Step 200: Loss = 0.6909
Epoch 8, Step 210: Loss = 0.4987
Epoch 8, Step 220: Loss = 0.4791
Epoch 8, Step 230: Loss = 0.5402
Epoch 8, Step 240: Loss = 0.5270
Epoch 8, Step 250: Loss = 0.8260
Epoch 8, Step 260: Loss = 0.8567
Epoch 8, Step 270: Loss = 0.4813
Epoch 8, Step 280: Loss = 0.3010
Epoch 8, Step 290: Loss = 0.5699
Epoch 8, Step 300: Lo




Epoch 8 Summary:
Training Metrics:
accuracy: 0.7631
precision: 0.7497
recall: 0.7900
f1: 0.7693
loss: 0.5396

Validation Metrics:
accuracy: 0.7683
precision: 0.7439
recall: 0.8183
f1: 0.7794
loss: 0.5283


In [5]:
import os
import json
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import (
    LlamaForSequenceClassification,
    LlamaTokenizer,
    AutoTokenizer,
    get_linear_schedule_with_warmup,
    AutoConfig
)
from peft import get_peft_model, LoraConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch.nn.functional as F
from huggingface_hub import login
import bitsandbytes as bnb
import re
import gc
import seaborn as sns
import matplotlib.pyplot as plt

def setup_environment():
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

class EmailDataset(Dataset):
    def __init__(self, emails_df, tokenizer, max_length=512):
        self.emails_df = emails_df.copy()
        self.emails_df['sender'] = self.emails_df['sender'].apply(clean_text)
        self.emails_df['subject'] = self.emails_df['subject'].apply(clean_text)
        self.emails_df['body'] = self.emails_df['body'].apply(clean_text)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.emails_df)

    def __getitem__(self, idx):
        email = self.emails_df.iloc[idx]
        input_text = f"Sender: {email['sender']} [SEP] Subject: {email['subject']} [SEP] {email['body']}"
        
        encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(email['label'], dtype=torch.long)
        }

def clean_text(text):
    if not isinstance(text, str):
        text = ""
    #text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    #text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def setup_model_and_tokenizer(model_name, device):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.padding_side = "right"
    tokenizer.pad_token = tokenizer.eos_token
    
    model_config = AutoConfig.from_pretrained(model_name)
    model_config.num_labels = 2
    model_config.pad_token_id = tokenizer.pad_token_id
    model_config.use_cache = False
    
    quantization_config = BitsAndBytesConfig(load_in_8bit=True)
    base_model = LlamaForSequenceClassification.from_pretrained(
        model_name,
        config=model_config,
        torch_dtype=torch.bfloat16,
        quantization_config=quantization_config
    )
    
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="SEQ_CLS"
    )
    model = get_peft_model(base_model, lora_config)
    model.gradient_checkpointing_enable()
    return model, tokenizer

def compute_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)
    conf_matrix = confusion_matrix(labels, preds)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    }

def plot_confusion_matrix(conf_matrix, output_dir):
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    #plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
    plt.close()

def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0
    
    with torch.no_grad():
        for batch in data_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            
            loss = outputs.loss
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['label'].cpu().numpy())
    
    metrics = compute_metrics(all_preds, all_labels)
    metrics['loss'] = total_loss / len(data_loader)
    return metrics

def train_model(model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=9):
    best_val_metrics = {'f1': 0}
    best_model_state = None
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_preds = []
        train_labels = []
        
        for step, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            
            loss = outputs.loss
            loss.backward()
            
            if (step + 1) % 2 == 0:  # Gradient accumulation steps = 2
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
            
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=-1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(batch['label'].cpu().numpy())
            
            if step % 10 == 0:
                print(f"Epoch {epoch+1}, Step {step}: Loss = {loss.item():.4f}")
            
            torch.cuda.empty_cache()
  
        train_metrics = compute_metrics(train_preds, train_labels)
        train_metrics['loss'] = total_loss / len(train_loader)
      
        val_metrics = evaluate_model(model, val_loader, device)
      
        print(f"\nEpoch {epoch + 1} Summary:")
        print("Training Metrics:")
        for metric, value in train_metrics.items():
            if metric != 'confusion_matrix':
                print(f"{metric}: {value:.4f}")
        
        print("\nValidation Metrics:")
        for metric, value in val_metrics.items():
            if metric != 'confusion_matrix':
                print(f"{metric}: {value:.4f}")
        
        # Save best model
        if val_metrics['f1'] > best_val_metrics['f1']:
            best_val_metrics = val_metrics
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
        
        torch.cuda.empty_cache()
        gc.collect()
    
    return best_model_state, best_val_metrics

def main():
    login(token="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")
    device = setup_environment()
    model_name = 'meta-llama/Meta-Llama-3-8B'
    data_path = os.path.expanduser("~/Downloads/Tune/FineTune/newdata_cleaned.csv")
    
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"File not found: {data_path}")
    
    model, tokenizer = setup_model_and_tokenizer(model_name, device)
    emails_df = pd.read_csv(data_path)
    
    train_df, val_df = train_test_split(emails_df, test_size=0.2, stratify=emails_df['label'], random_state=42)
    
    train_dataset = EmailDataset(train_df, tokenizer, max_length=512)
    val_dataset = EmailDataset(val_df, tokenizer, max_length=512)
    
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, num_workers=2, pin_memory=True)
    
    optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=2e-5)
    num_epochs = 9
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = num_training_steps // 20
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    best_model_state, best_metrics = train_model(
        model,
        train_loader,
        val_loader,
        optimizer,
        scheduler,
        device,
        num_epochs=num_epochs
    )
    
    
    output_dir = os.path.expanduser("~/Downloads/Tune/FineTune/binary_classification_8B_revised")
    os.makedirs(output_dir, exist_ok=True)
    
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

    #plot_confusion_matrix(best_metrics['confusion_matrix'], output_dir)
    
  
    config = {
        "model_name": model_name,
        "num_epochs": num_epochs,
        "learning_rate": 2e-5,
        "batch_size": 8,
        "max_length": 512,
        "warmup_steps": num_warmup_steps,
        "total_steps": num_training_steps,
        "device": str(device),
        "best_metrics": {k: float(v) if k != 'confusion_matrix' else v.tolist() 
                        for k, v in best_metrics.items()}
    }
    
    with open(os.path.join(output_dir, "training_config.json"), "w") as f:
        json.dump(config, f, indent=2)

if __name__ == "__main__":
    main()

Using GPU: NVIDIA RTX A5000
GPU Memory: 23.68 GB


`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Meta-Llama-3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Step 0: Loss = 1.0201
Epoch 1, Step 10: Loss = 2.0799
Epoch 1, Step 20: Loss = 1.9430
Epoch 1, Step 30: Loss = 1.0707
Epoch 1, Step 40: Loss = 2.6649
Epoch 1, Step 50: Loss = 1.5183
Epoch 1, Step 60: Loss = 0.7090
Epoch 1, Step 70: Loss = 1.5923
Epoch 1, Step 80: Loss = 0.7225
Epoch 1, Step 90: Loss = 0.4653
Epoch 1, Step 100: Loss = 0.8534
Epoch 1, Step 110: Loss = 0.7550
Epoch 1, Step 120: Loss = 1.0739
Epoch 1, Step 130: Loss = 1.2751
Epoch 1, Step 140: Loss = 1.4228
Epoch 1, Step 150: Loss = 0.9395
Epoch 1, Step 160: Loss = 2.1517
Epoch 1, Step 170: Loss = 0.8936
Epoch 1, Step 180: Loss = 1.7863
Epoch 1, Step 190: Loss = 1.6126
Epoch 1, Step 200: Loss = 1.9987
Epoch 1, Step 210: Loss = 1.5510
Epoch 1, Step 220: Loss = 0.4270
Epoch 1, Step 230: Loss = 1.2550
Epoch 1, Step 240: Loss = 1.0416
Epoch 1, Step 250: Loss = 0.9989
Epoch 1, Step 260: Loss = 0.8839
Epoch 1, Step 270: Loss = 1.5619
Epoch 1, Step 280: Loss = 2.2744
Epoch 1, Step 290: Loss = 0.7689
Epoch 1, Step 300: Lo




Epoch 1 Summary:
Training Metrics:
accuracy: 0.5671
precision: 0.5574
recall: 0.6517
f1: 0.6008
loss: 1.1971

Validation Metrics:
accuracy: 0.6283
precision: 0.6258
recall: 0.6383
f1: 0.6320
loss: 0.8962




Epoch 2, Step 0: Loss = 1.2067
Epoch 2, Step 10: Loss = 1.4690
Epoch 2, Step 20: Loss = 1.3225
Epoch 2, Step 30: Loss = 1.2704
Epoch 2, Step 40: Loss = 0.7351
Epoch 2, Step 50: Loss = 1.2196
Epoch 2, Step 60: Loss = 0.3089
Epoch 2, Step 70: Loss = 1.4756
Epoch 2, Step 80: Loss = 0.6224
Epoch 2, Step 90: Loss = 0.5824
Epoch 2, Step 100: Loss = 1.6997
Epoch 2, Step 110: Loss = 0.6794
Epoch 2, Step 120: Loss = 0.4628
Epoch 2, Step 130: Loss = 1.1423
Epoch 2, Step 140: Loss = 0.9324
Epoch 2, Step 150: Loss = 0.8387
Epoch 2, Step 160: Loss = 1.3702
Epoch 2, Step 170: Loss = 1.0165
Epoch 2, Step 180: Loss = 0.7973
Epoch 2, Step 190: Loss = 0.3767
Epoch 2, Step 200: Loss = 0.5214
Epoch 2, Step 210: Loss = 0.6168
Epoch 2, Step 220: Loss = 0.6362
Epoch 2, Step 230: Loss = 0.4896
Epoch 2, Step 240: Loss = 0.4474
Epoch 2, Step 250: Loss = 0.5449
Epoch 2, Step 260: Loss = 0.9650
Epoch 2, Step 270: Loss = 0.5175
Epoch 2, Step 280: Loss = 0.4356
Epoch 2, Step 290: Loss = 0.2591
Epoch 2, Step 300: Lo




Epoch 2 Summary:
Training Metrics:
accuracy: 0.6923
precision: 0.6895
recall: 0.6996
f1: 0.6945
loss: 0.7210

Validation Metrics:
accuracy: 0.7575
precision: 0.7537
recall: 0.7650
f1: 0.7593
loss: 0.5665




Epoch 3, Step 0: Loss = 0.6685
Epoch 3, Step 10: Loss = 1.2859
Epoch 3, Step 20: Loss = 0.2171
Epoch 3, Step 30: Loss = 0.9447
Epoch 3, Step 40: Loss = 0.4545
Epoch 3, Step 50: Loss = 0.7475
Epoch 3, Step 60: Loss = 0.8197
Epoch 3, Step 70: Loss = 0.4796
Epoch 3, Step 80: Loss = 0.3637
Epoch 3, Step 90: Loss = 0.7266
Epoch 3, Step 100: Loss = 1.1577
Epoch 3, Step 110: Loss = 0.7384
Epoch 3, Step 120: Loss = 0.2847
Epoch 3, Step 130: Loss = 0.7295
Epoch 3, Step 140: Loss = 0.2544
Epoch 3, Step 150: Loss = 0.3054
Epoch 3, Step 160: Loss = 0.4911
Epoch 3, Step 170: Loss = 0.7533
Epoch 3, Step 180: Loss = 0.2588
Epoch 3, Step 190: Loss = 0.4652
Epoch 3, Step 200: Loss = 0.4506
Epoch 3, Step 210: Loss = 0.6996
Epoch 3, Step 220: Loss = 0.2090
Epoch 3, Step 230: Loss = 0.2631
Epoch 3, Step 240: Loss = 0.3075
Epoch 3, Step 250: Loss = 0.6726
Epoch 3, Step 260: Loss = 0.1591
Epoch 3, Step 270: Loss = 0.2432
Epoch 3, Step 280: Loss = 0.6471
Epoch 3, Step 290: Loss = 0.7295
Epoch 3, Step 300: Lo




Epoch 3 Summary:
Training Metrics:
accuracy: 0.7700
precision: 0.7711
recall: 0.7679
f1: 0.7695
loss: 0.5193

Validation Metrics:
accuracy: 0.8050
precision: 0.7952
recall: 0.8217
f1: 0.8082
loss: 0.4694




Epoch 4, Step 0: Loss = 0.2062
Epoch 4, Step 10: Loss = 0.1328
Epoch 4, Step 20: Loss = 0.8352
Epoch 4, Step 30: Loss = 0.6021
Epoch 4, Step 40: Loss = 0.3119
Epoch 4, Step 50: Loss = 0.2443
Epoch 4, Step 60: Loss = 0.2862
Epoch 4, Step 70: Loss = 0.4691
Epoch 4, Step 80: Loss = 0.8902
Epoch 4, Step 90: Loss = 0.8654
Epoch 4, Step 100: Loss = 0.1640
Epoch 4, Step 110: Loss = 0.6538
Epoch 4, Step 120: Loss = 0.8195
Epoch 4, Step 130: Loss = 0.4799
Epoch 4, Step 140: Loss = 0.4633
Epoch 4, Step 150: Loss = 0.7258
Epoch 4, Step 160: Loss = 0.4624
Epoch 4, Step 170: Loss = 0.9507
Epoch 4, Step 180: Loss = 0.4534
Epoch 4, Step 190: Loss = 0.4945
Epoch 4, Step 200: Loss = 0.6972
Epoch 4, Step 210: Loss = 0.8402
Epoch 4, Step 220: Loss = 0.4330
Epoch 4, Step 230: Loss = 0.4441
Epoch 4, Step 240: Loss = 0.4102
Epoch 4, Step 250: Loss = 0.0778
Epoch 4, Step 260: Loss = 0.2228
Epoch 4, Step 270: Loss = 0.1479
Epoch 4, Step 280: Loss = 0.4418
Epoch 4, Step 290: Loss = 0.0814
Epoch 4, Step 300: Lo




Epoch 4 Summary:
Training Metrics:
accuracy: 0.8054
precision: 0.8047
recall: 0.8067
f1: 0.8057
loss: 0.4493

Validation Metrics:
accuracy: 0.8242
precision: 0.8173
recall: 0.8350
f1: 0.8261
loss: 0.4342




Epoch 5, Step 0: Loss = 0.0990
Epoch 5, Step 10: Loss = 0.1100
Epoch 5, Step 20: Loss = 0.6624
Epoch 5, Step 30: Loss = 0.3131
Epoch 5, Step 40: Loss = 0.2279
Epoch 5, Step 50: Loss = 0.3831
Epoch 5, Step 60: Loss = 0.4696
Epoch 5, Step 70: Loss = 0.5470
Epoch 5, Step 80: Loss = 0.4089
Epoch 5, Step 90: Loss = 0.8191
Epoch 5, Step 100: Loss = 0.1950
Epoch 5, Step 110: Loss = 0.1967
Epoch 5, Step 120: Loss = 0.4207
Epoch 5, Step 130: Loss = 0.5370
Epoch 5, Step 140: Loss = 0.8639
Epoch 5, Step 150: Loss = 1.5746
Epoch 5, Step 160: Loss = 0.1322
Epoch 5, Step 170: Loss = 0.5886
Epoch 5, Step 180: Loss = 0.4048
Epoch 5, Step 190: Loss = 0.5198
Epoch 5, Step 200: Loss = 0.1504
Epoch 5, Step 210: Loss = 0.7037
Epoch 5, Step 220: Loss = 0.4896
Epoch 5, Step 230: Loss = 0.2120
Epoch 5, Step 240: Loss = 0.6935
Epoch 5, Step 250: Loss = 0.6593
Epoch 5, Step 260: Loss = 0.7328
Epoch 5, Step 270: Loss = 0.7225
Epoch 5, Step 280: Loss = 0.0739
Epoch 5, Step 290: Loss = 0.6100
Epoch 5, Step 300: Lo




Epoch 5 Summary:
Training Metrics:
accuracy: 0.8133
precision: 0.8144
recall: 0.8117
f1: 0.8130
loss: 0.4296

Validation Metrics:
accuracy: 0.8333
precision: 0.8279
recall: 0.8417
f1: 0.8347
loss: 0.4172




Epoch 6, Step 0: Loss = 0.1668
Epoch 6, Step 10: Loss = 0.0474
Epoch 6, Step 20: Loss = 0.6506
Epoch 6, Step 30: Loss = 1.3475
Epoch 6, Step 40: Loss = 0.5238
Epoch 6, Step 50: Loss = 0.5794
Epoch 6, Step 60: Loss = 0.3678
Epoch 6, Step 70: Loss = 0.1993
Epoch 6, Step 80: Loss = 0.2345
Epoch 6, Step 90: Loss = 0.4459
Epoch 6, Step 100: Loss = 0.4330
Epoch 6, Step 110: Loss = 0.7010
Epoch 6, Step 120: Loss = 0.5625
Epoch 6, Step 130: Loss = 0.1081
Epoch 6, Step 140: Loss = 0.2626
Epoch 6, Step 150: Loss = 0.3033
Epoch 6, Step 160: Loss = 0.7171
Epoch 6, Step 170: Loss = 0.4904
Epoch 6, Step 180: Loss = 0.5293
Epoch 6, Step 190: Loss = 0.7592
Epoch 6, Step 200: Loss = 0.1043
Epoch 6, Step 210: Loss = 0.2384
Epoch 6, Step 220: Loss = 0.5612
Epoch 6, Step 230: Loss = 0.2620
Epoch 6, Step 240: Loss = 0.5511
Epoch 6, Step 250: Loss = 0.6964
Epoch 6, Step 260: Loss = 0.5176
Epoch 6, Step 270: Loss = 0.3460
Epoch 6, Step 280: Loss = 0.3603
Epoch 6, Step 290: Loss = 0.2600
Epoch 6, Step 300: Lo




Epoch 6 Summary:
Training Metrics:
accuracy: 0.8233
precision: 0.8261
recall: 0.8192
f1: 0.8226
loss: 0.4108

Validation Metrics:
accuracy: 0.8350
precision: 0.8295
recall: 0.8433
f1: 0.8364
loss: 0.4095




Epoch 7, Step 0: Loss = 0.2659
Epoch 7, Step 10: Loss = 0.2016
Epoch 7, Step 20: Loss = 0.2459
Epoch 7, Step 30: Loss = 0.6207
Epoch 7, Step 40: Loss = 0.2176
Epoch 7, Step 50: Loss = 0.1260
Epoch 7, Step 60: Loss = 0.7005
Epoch 7, Step 70: Loss = 0.2711
Epoch 7, Step 80: Loss = 0.3098
Epoch 7, Step 90: Loss = 0.0592
Epoch 7, Step 100: Loss = 0.8338
Epoch 7, Step 110: Loss = 0.1684
Epoch 7, Step 120: Loss = 0.4756
Epoch 7, Step 130: Loss = 0.2388
Epoch 7, Step 140: Loss = 0.2030
Epoch 7, Step 150: Loss = 0.1851
Epoch 7, Step 160: Loss = 0.2112
Epoch 7, Step 170: Loss = 0.2918
Epoch 7, Step 180: Loss = 0.0681
Epoch 7, Step 190: Loss = 0.1299
Epoch 7, Step 200: Loss = 0.0904
Epoch 7, Step 210: Loss = 0.2553
Epoch 7, Step 220: Loss = 0.2253
Epoch 7, Step 230: Loss = 0.2416
Epoch 7, Step 240: Loss = 0.8246
Epoch 7, Step 250: Loss = 0.4017
Epoch 7, Step 260: Loss = 0.2774
Epoch 7, Step 270: Loss = 0.3267
Epoch 7, Step 280: Loss = 0.6528
Epoch 7, Step 290: Loss = 0.2252
Epoch 7, Step 300: Lo




Epoch 7 Summary:
Training Metrics:
accuracy: 0.8246
precision: 0.8273
recall: 0.8204
f1: 0.8238
loss: 0.4100

Validation Metrics:
accuracy: 0.8342
precision: 0.8282
recall: 0.8433
f1: 0.8357
loss: 0.4060




Epoch 8, Step 0: Loss = 0.3762
Epoch 8, Step 10: Loss = 0.3037
Epoch 8, Step 20: Loss = 0.3188
Epoch 8, Step 30: Loss = 0.6653
Epoch 8, Step 40: Loss = 0.2065
Epoch 8, Step 50: Loss = 0.5300
Epoch 8, Step 60: Loss = 0.1579
Epoch 8, Step 70: Loss = 0.3381
Epoch 8, Step 80: Loss = 0.1750
Epoch 8, Step 90: Loss = 0.4464
Epoch 8, Step 100: Loss = 0.0978
Epoch 8, Step 110: Loss = 0.2467
Epoch 8, Step 120: Loss = 0.0698
Epoch 8, Step 130: Loss = 0.5287
Epoch 8, Step 140: Loss = 0.3284
Epoch 8, Step 150: Loss = 0.1188
Epoch 8, Step 160: Loss = 0.3246
Epoch 8, Step 170: Loss = 0.9202
Epoch 8, Step 180: Loss = 0.3063
Epoch 8, Step 190: Loss = 0.3213
Epoch 8, Step 200: Loss = 0.2877
Epoch 8, Step 210: Loss = 0.2384
Epoch 8, Step 220: Loss = 0.5121
Epoch 8, Step 230: Loss = 0.4663
Epoch 8, Step 240: Loss = 0.6268
Epoch 8, Step 250: Loss = 0.4933
Epoch 8, Step 260: Loss = 0.1397
Epoch 8, Step 270: Loss = 0.4174
Epoch 8, Step 280: Loss = 0.1364
Epoch 8, Step 290: Loss = 0.1532
Epoch 8, Step 300: Lo




Epoch 8 Summary:
Training Metrics:
accuracy: 0.8281
precision: 0.8277
recall: 0.8287
f1: 0.8282
loss: 0.4043

Validation Metrics:
accuracy: 0.8350
precision: 0.8295
recall: 0.8433
f1: 0.8364
loss: 0.4043




Epoch 9, Step 0: Loss = 0.0709
Epoch 9, Step 10: Loss = 0.1444
Epoch 9, Step 20: Loss = 0.1741
Epoch 9, Step 30: Loss = 0.0682
Epoch 9, Step 40: Loss = 0.1114
Epoch 9, Step 50: Loss = 0.4929
Epoch 9, Step 60: Loss = 0.1031
Epoch 9, Step 70: Loss = 0.4192
Epoch 9, Step 80: Loss = 0.8291
Epoch 9, Step 90: Loss = 0.7346
Epoch 9, Step 100: Loss = 0.4333
Epoch 9, Step 110: Loss = 0.2631
Epoch 9, Step 120: Loss = 0.2886
Epoch 9, Step 130: Loss = 0.7934
Epoch 9, Step 140: Loss = 0.5070
Epoch 9, Step 150: Loss = 0.7782
Epoch 9, Step 160: Loss = 0.5225
Epoch 9, Step 170: Loss = 0.1582
Epoch 9, Step 180: Loss = 0.8176
Epoch 9, Step 190: Loss = 0.1243
Epoch 9, Step 200: Loss = 0.5370
Epoch 9, Step 210: Loss = 0.2017
Epoch 9, Step 220: Loss = 0.0919
Epoch 9, Step 230: Loss = 0.1657
Epoch 9, Step 240: Loss = 0.6046
Epoch 9, Step 250: Loss = 0.2597
Epoch 9, Step 260: Loss = 0.4361
Epoch 9, Step 270: Loss = 0.2859
Epoch 9, Step 280: Loss = 0.5497
Epoch 9, Step 290: Loss = 0.1168
Epoch 9, Step 300: Lo




Epoch 9 Summary:
Training Metrics:
accuracy: 0.8310
precision: 0.8312
recall: 0.8308
f1: 0.8310
loss: 0.4024

Validation Metrics:
accuracy: 0.8358
precision: 0.8309
recall: 0.8433
f1: 0.8371
loss: 0.4034


In [4]:
import os
import json
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    BertTokenizer,
    BertForSequenceClassification,
    get_linear_schedule_with_warmup,
    AutoConfig
)
from peft import get_peft_model, LoraConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch.nn.functional as F
from huggingface_hub import login
import bitsandbytes as bnb
import re
import gc


def setup_environment():
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

class EmailDataset(Dataset):
    def __init__(self, emails_df, tokenizer, max_length=512):
        self.emails_df = emails_df.copy()
        self.emails_df['sender'] = self.emails_df['sender'].apply(clean_text)
        self.emails_df['subject'] = self.emails_df['subject'].apply(clean_text)
        self.emails_df['body'] = self.emails_df['body'].apply(clean_text)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.emails_df)

    def __getitem__(self, idx):
        email = self.emails_df.iloc[idx]
        input_text = f"Sender: {email['sender']} [SEP] Subject: {email['subject']} [SEP] {email['body']}"
        
        encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(email['label'], dtype=torch.long)
        }

def clean_text(text):
    if not isinstance(text, str):
        text = ""
   
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def setup_model_and_tokenizer(model_name, device):
    tokenizer = BertTokenizer.from_pretrained(model_name)
    
    model_config = AutoConfig.from_pretrained(model_name)
    model_config.num_labels = 2
    
    model = BertForSequenceClassification.from_pretrained(
        model_name,
        config=model_config
    )
    

    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["query", "value"],
        lora_dropout=0.05,
        bias="none",
        task_type="SEQ_CLS"
    )
    model = get_peft_model(model, lora_config)
    
    return model, tokenizer

def compute_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)
    conf_matrix = confusion_matrix(labels, preds)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    }

def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0
    
    with torch.no_grad():
        for batch in data_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            
            outputs = model(
                input_ids=batch['input_ids'],
                attention_mask=batch['attention_mask'],
                labels=batch['label']
            )
            
            loss = outputs.loss
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['label'].cpu().numpy())
    
    metrics = compute_metrics(all_preds, all_labels)
    metrics['loss'] = total_loss / len(data_loader)
    return metrics

def train_model(model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=8):
    best_val_metrics = {'f1': 0}
    best_model_state = None
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_preds = []
        train_labels = []
        
        for step, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            
            outputs = model(
                input_ids=batch['input_ids'],
                attention_mask=batch['attention_mask'],
                labels=batch['label']
            )
            
            loss = outputs.loss
            loss.backward()
            
            if (step + 1) % 2 == 0:  # Gradient accumulation steps = 2
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
            
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=-1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(batch['label'].cpu().numpy())
            
            if step % 10 == 0:
                print(f"Epoch {epoch+1}, Step {step}: Loss = {loss.item():.4f}")
            
            torch.cuda.empty_cache()
  
        train_metrics = compute_metrics(train_preds, train_labels)
        train_metrics['loss'] = total_loss / len(train_loader)
      
        val_metrics = evaluate_model(model, val_loader, device)
      
        print(f"\nEpoch {epoch + 1} Summary:")
        print("Training Metrics:")
        for metric, value in train_metrics.items():
            if metric != 'confusion_matrix':
                print(f"{metric}: {value:.4f}")
        
        print("\nValidation Metrics:")
        for metric, value in val_metrics.items():
            if metric != 'confusion_matrix':
                print(f"{metric}: {value:.4f}")
        
        if val_metrics['f1'] > best_val_metrics['f1']:
            best_val_metrics = val_metrics
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
        
        torch.cuda.empty_cache()
        gc.collect()
    
    return best_model_state, best_val_metrics

def main():
    login(token="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")
    device = setup_environment()
    model_name = 'bert-base-uncased'
    data_path = os.path.expanduser("~/Downloads/Tune/FineTune/newdata_cleaned.csv")
    
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"File not found: {data_path}")
    
    model, tokenizer = setup_model_and_tokenizer(model_name, device)
    emails_df = pd.read_csv(data_path)
    
    train_df, val_df = train_test_split(emails_df, test_size=0.2, stratify=emails_df['label'], random_state=42)
    
    train_dataset = EmailDataset(train_df, tokenizer, max_length=512)
    val_dataset = EmailDataset(val_df, tokenizer, max_length=512)
    
    train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=16, num_workers=2, pin_memory=True)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)  
    num_epochs = 6
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = num_training_steps // 20
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    best_model_state, best_metrics = train_model(
        model,
        train_loader,
        val_loader,
        optimizer,
        scheduler,
        device,
        num_epochs=num_epochs
    )
    
    output_dir = os.path.expanduser("~/Downloads/Tune/FineTune/binary_classification_bert_revised")
    os.makedirs(output_dir, exist_ok=True)
    
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

    
    config = {
        "model_name": model_name,
        "num_epochs": num_epochs,
        "learning_rate": 2e-5,
        "batch_size": 16,
        "max_length": 512,
        "warmup_steps": num_warmup_steps,
        "total_steps": num_training_steps,
        "device": str(device),
        "best_metrics": {k: float(v) if k != 'confusion_matrix' else v.tolist() 
                        for k, v in best_metrics.items()}
    }
    
    with open(os.path.join(output_dir, "training_config.json"), "w") as f:
        json.dump(config, f, indent=2)

if __name__ == "__main__":
    main()

Using GPU: NVIDIA RTX A5000
GPU Memory: 23.68 GB


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Step 0: Loss = 0.7377
Epoch 1, Step 10: Loss = 0.7115
Epoch 1, Step 20: Loss = 0.6869
Epoch 1, Step 30: Loss = 0.6944
Epoch 1, Step 40: Loss = 0.6701
Epoch 1, Step 50: Loss = 0.6761
Epoch 1, Step 60: Loss = 0.7054
Epoch 1, Step 70: Loss = 0.6676
Epoch 1, Step 80: Loss = 0.6628
Epoch 1, Step 90: Loss = 0.6530
Epoch 1, Step 100: Loss = 0.6286
Epoch 1, Step 110: Loss = 0.6690
Epoch 1, Step 120: Loss = 0.6444
Epoch 1, Step 130: Loss = 0.6622
Epoch 1, Step 140: Loss = 0.7084
Epoch 1, Step 150: Loss = 0.6539
Epoch 1, Step 160: Loss = 0.6355
Epoch 1, Step 170: Loss = 0.6604
Epoch 1, Step 180: Loss = 0.6192
Epoch 1, Step 190: Loss = 0.6211
Epoch 1, Step 200: Loss = 0.5759
Epoch 1, Step 210: Loss = 0.5991
Epoch 1, Step 220: Loss = 0.6746
Epoch 1, Step 230: Loss = 0.6010
Epoch 1, Step 240: Loss = 0.6221
Epoch 1, Step 250: Loss = 0.5301
Epoch 1, Step 260: Loss = 0.6151
Epoch 1, Step 270: Loss = 0.5844
Epoch 1, Step 280: Loss = 0.4754
Epoch 1, Step 290: Loss = 0.5471

Epoch 1 Summary:
Tra

In [3]:
import os
import json
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import (AutoTokenizer,AutoModelForSequenceClassification,
    LlamaForSequenceClassification,
    LlamaTokenizer,
                           BitsAndBytesConfig,
    get_linear_schedule_with_warmup,
    AutoConfig
)
from peft import get_peft_model, LoraConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch.nn.functional as F
from huggingface_hub import login
import bitsandbytes as bnb
import re
import gc
import seaborn as sns
import matplotlib.pyplot as plt

def setup_environment():
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

class EmailDataset(Dataset):
    def __init__(self, emails_df, tokenizer, max_length=512):
        self.emails_df = emails_df.copy()
        self.emails_df['sender'] = self.emails_df['sender'].apply(clean_text)
        self.emails_df['subject'] = self.emails_df['subject'].apply(clean_text)
        self.emails_df['body'] = self.emails_df['body'].apply(clean_text)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.emails_df)

    def __getitem__(self, idx):
        email = self.emails_df.iloc[idx]
        input_text = f"Sender: {email['sender']} [SEP] Subject: {email['subject']} [SEP] {email['body']}"
        
        encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(email['label'], dtype=torch.long)
        }

def clean_text(text):
    if not isinstance(text, str):
        text = ""
    #text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    #text = re.sub(r'\S+@\S+', '', text)
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def setup_model_and_tokenizer(model_name, device):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.padding_side = "right"
    tokenizer.pad_token = tokenizer.eos_token
    
    model_config = AutoConfig.from_pretrained(model_name)
    model_config.num_labels = 2
    model_config.pad_token_id = tokenizer.pad_token_id
    model_config.use_cache = False
    
    quantization_config = BitsAndBytesConfig(load_in_8bit=True)
    base_model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=model_config,
        torch_dtype=torch.bfloat16,
        quantization_config=quantization_config
    )
    
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="SEQ_CLS"
    )
    model = get_peft_model(base_model, lora_config)
    model.gradient_checkpointing_enable()
    return model, tokenizer

def compute_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)
    conf_matrix = confusion_matrix(labels, preds)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    }

def plot_confusion_matrix(conf_matrix, output_dir):
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
    plt.close()

def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0
    
    with torch.no_grad():
        for batch in data_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            
            loss = outputs.loss
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['label'].cpu().numpy())
    
    metrics = compute_metrics(all_preds, all_labels)
    metrics['loss'] = total_loss / len(data_loader)
    return metrics

def train_model(model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=9):
    best_val_metrics = {'f1': 0}
    best_model_state = None
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_preds = []
        train_labels = []
        
        for step, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            
            loss = outputs.loss
            loss.backward()
            
            if (step + 1) % 2 == 0:  # Gradient accumulation steps = 2
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
            
            total_loss += loss.item()
            
            preds = torch.argmax(outputs.logits, dim=-1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(batch['label'].cpu().numpy())
            
            if step % 10 == 0:
                print(f"Epoch {epoch+1}, Step {step}: Loss = {loss.item():.4f}")
            
            torch.cuda.empty_cache()
  
        train_metrics = compute_metrics(train_preds, train_labels)
        train_metrics['loss'] = total_loss / len(train_loader)
      
        val_metrics = evaluate_model(model, val_loader, device)
      
        print(f"\nEpoch {epoch + 1} Summary:")
        print("Training Metrics:")
        for metric, value in train_metrics.items():
            if metric != 'confusion_matrix':
                print(f"{metric}: {value:.4f}")
        
        print("\nValidation Metrics:")
        for metric, value in val_metrics.items():
            if metric != 'confusion_matrix':
                print(f"{metric}: {value:.4f}")
        
        # Save best model
        if val_metrics['f1'] > best_val_metrics['f1']:
            best_val_metrics = val_metrics
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
        
        torch.cuda.empty_cache()
        gc.collect()
    
    return best_model_state, best_val_metrics

def main():
    login(token="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")
    device = setup_environment()
    model_name = 'dreamgen/WizardLM-2-7B'
    data_path = os.path.expanduser("~/Downloads/Tune/FineTune/newdata_cleaned.csv")
    
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"File not found: {data_path}")
    
    model, tokenizer = setup_model_and_tokenizer(model_name, device)
    emails_df = pd.read_csv(data_path)
    
    train_df, val_df = train_test_split(emails_df, test_size=0.2, stratify=emails_df['label'], random_state=42)
    
    train_dataset = EmailDataset(train_df, tokenizer, max_length=512)
    val_dataset = EmailDataset(val_df, tokenizer, max_length=512)
    
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, num_workers=2, pin_memory=True)
    
    optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=2e-5)
    num_epochs = 9
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = num_training_steps // 20
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    
    best_model_state, best_metrics = train_model(
        model,
        train_loader,
        val_loader,
        optimizer,
        scheduler,
        device,
        num_epochs=num_epochs
    )
    
    
    output_dir = os.path.expanduser("~/Downloads/Tune/FineTune/binary_classification_wiz_revised")
    os.makedirs(output_dir, exist_ok=True)
    
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)

   
    
  
    config = {
        "model_name": model_name,
        "num_epochs": num_epochs,
        "learning_rate": 2e-5,
        "batch_size": 8,
        "max_length": 512,
        "warmup_steps": num_warmup_steps,
        "total_steps": num_training_steps,
        "device": str(device),
        "best_metrics": {k: float(v) if k != 'confusion_matrix' else v.tolist() 
                        for k, v in best_metrics.items()}
    }
    
    with open(os.path.join(output_dir, "training_config.json"), "w") as f:
        json.dump(config, f, indent=2)

if __name__ == "__main__":
    main()

Using GPU: NVIDIA RTX A5000
GPU Memory: 23.68 GB


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at dreamgen/WizardLM-2-7B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Step 0: Loss = 6.2113
Epoch 1, Step 10: Loss = 6.6387
Epoch 1, Step 20: Loss = 1.4686
Epoch 1, Step 30: Loss = 1.3207
Epoch 1, Step 40: Loss = 0.0483
Epoch 1, Step 50: Loss = 3.2610
Epoch 1, Step 60: Loss = 7.1458
Epoch 1, Step 70: Loss = 2.7937
Epoch 1, Step 80: Loss = 3.8336
Epoch 1, Step 90: Loss = 3.3470
Epoch 1, Step 100: Loss = 1.0829
Epoch 1, Step 110: Loss = 0.1463
Epoch 1, Step 120: Loss = 1.9070
Epoch 1, Step 130: Loss = 2.9357
Epoch 1, Step 140: Loss = 6.8344
Epoch 1, Step 150: Loss = 0.7980
Epoch 1, Step 160: Loss = 0.9489
Epoch 1, Step 170: Loss = 2.0826
Epoch 1, Step 180: Loss = 2.0940
Epoch 1, Step 190: Loss = 2.6393
Epoch 1, Step 200: Loss = 2.1722
Epoch 1, Step 210: Loss = 1.4988
Epoch 1, Step 220: Loss = 1.4168
Epoch 1, Step 230: Loss = 3.3555
Epoch 1, Step 240: Loss = 3.6142
Epoch 1, Step 250: Loss = 0.5334
Epoch 1, Step 260: Loss = 3.3424
Epoch 1, Step 270: Loss = 4.0149
Epoch 1, Step 280: Loss = 0.5563
Epoch 1, Step 290: Loss = 3.8309
Epoch 1, Step 300: Lo




Epoch 1 Summary:
Training Metrics:
accuracy: 0.5548
precision: 0.5755
recall: 0.4175
f1: 0.4839
loss: 2.9538

Validation Metrics:
accuracy: 0.6050
precision: 0.6230
recall: 0.5317
f1: 0.5737
loss: 2.6958




Epoch 2, Step 0: Loss = 1.9413
Epoch 2, Step 10: Loss = 2.4812
Epoch 2, Step 20: Loss = 1.5239
Epoch 2, Step 30: Loss = 2.6381
Epoch 2, Step 40: Loss = 5.3037
Epoch 2, Step 50: Loss = 1.7171
Epoch 2, Step 60: Loss = 3.0399
Epoch 2, Step 70: Loss = 3.6979
Epoch 2, Step 80: Loss = 3.2975
Epoch 2, Step 90: Loss = 2.7928
Epoch 2, Step 100: Loss = 3.3085
Epoch 2, Step 110: Loss = 3.3500
Epoch 2, Step 120: Loss = 4.1650
Epoch 2, Step 130: Loss = 3.3132
Epoch 2, Step 140: Loss = 0.9803
Epoch 2, Step 150: Loss = 0.6280
Epoch 2, Step 160: Loss = 1.2280
Epoch 2, Step 170: Loss = 2.7566
Epoch 2, Step 180: Loss = 0.4447
Epoch 2, Step 190: Loss = 5.9158
Epoch 2, Step 200: Loss = 0.8544
Epoch 2, Step 210: Loss = 1.9219
Epoch 2, Step 220: Loss = 3.8035
Epoch 2, Step 230: Loss = 2.1817
Epoch 2, Step 240: Loss = 0.5151
Epoch 2, Step 250: Loss = 4.6344
Epoch 2, Step 260: Loss = 1.8239
Epoch 2, Step 270: Loss = 3.9730
Epoch 2, Step 280: Loss = 3.0259
Epoch 2, Step 290: Loss = 4.8448
Epoch 2, Step 300: Lo




Epoch 2 Summary:
Training Metrics:
accuracy: 0.6229
precision: 0.6348
recall: 0.5787
f1: 0.6055
loss: 2.2871

Validation Metrics:
accuracy: 0.6642
precision: 0.6768
recall: 0.6283
f1: 0.6517
loss: 2.1893




Epoch 3, Step 0: Loss = 1.4000
Epoch 3, Step 10: Loss = 0.1392
Epoch 3, Step 20: Loss = 3.3958
Epoch 3, Step 30: Loss = 2.2521
Epoch 3, Step 40: Loss = 1.0122
Epoch 3, Step 50: Loss = 1.2499
Epoch 3, Step 60: Loss = 2.2842
Epoch 3, Step 70: Loss = 2.8229
Epoch 3, Step 80: Loss = 5.1533
Epoch 3, Step 90: Loss = 1.5236
Epoch 3, Step 100: Loss = 3.1277
Epoch 3, Step 110: Loss = 1.3050
Epoch 3, Step 120: Loss = 2.6654
Epoch 3, Step 130: Loss = 0.7327
Epoch 3, Step 140: Loss = 7.8349
Epoch 3, Step 150: Loss = 2.9700
Epoch 3, Step 160: Loss = 1.6368
Epoch 3, Step 170: Loss = 5.3865
Epoch 3, Step 180: Loss = 0.8436
Epoch 3, Step 190: Loss = 4.2251
Epoch 3, Step 200: Loss = 1.3719
Epoch 3, Step 210: Loss = 2.9848
Epoch 3, Step 220: Loss = 0.0521
Epoch 3, Step 230: Loss = 3.2790
Epoch 3, Step 240: Loss = 1.0863
Epoch 3, Step 250: Loss = 2.7086
Epoch 3, Step 260: Loss = 2.1116
Epoch 3, Step 270: Loss = 3.3270
Epoch 3, Step 280: Loss = 1.3022
Epoch 3, Step 290: Loss = 2.3300
Epoch 3, Step 300: Lo




Epoch 3 Summary:
Training Metrics:
accuracy: 0.6642
precision: 0.6761
recall: 0.6304
f1: 0.6524
loss: 1.9498

Validation Metrics:
accuracy: 0.6900
precision: 0.7050
recall: 0.6533
f1: 0.6782
loss: 1.9778




Epoch 4, Step 0: Loss = 1.5301
Epoch 4, Step 10: Loss = 3.0882
Epoch 4, Step 20: Loss = 0.4124
Epoch 4, Step 30: Loss = 1.5541
Epoch 4, Step 40: Loss = 2.1682
Epoch 4, Step 50: Loss = 0.5299
Epoch 4, Step 60: Loss = 2.0470
Epoch 4, Step 70: Loss = 1.5550
Epoch 4, Step 80: Loss = 1.0974
Epoch 4, Step 90: Loss = 1.3561
Epoch 4, Step 100: Loss = 2.6563
Epoch 4, Step 110: Loss = 3.5315
Epoch 4, Step 120: Loss = 1.8248
Epoch 4, Step 130: Loss = 0.1975
Epoch 4, Step 140: Loss = 2.5466
Epoch 4, Step 150: Loss = 0.4991
Epoch 4, Step 160: Loss = 2.0889
Epoch 4, Step 170: Loss = 2.7611
Epoch 4, Step 180: Loss = 4.0551
Epoch 4, Step 190: Loss = 3.1772
Epoch 4, Step 200: Loss = 2.6787
Epoch 4, Step 210: Loss = 2.3296
Epoch 4, Step 220: Loss = 1.3420
Epoch 4, Step 230: Loss = 3.0670
Epoch 4, Step 240: Loss = 4.6900
Epoch 4, Step 250: Loss = 5.7090
Epoch 4, Step 260: Loss = 0.9868
Epoch 4, Step 270: Loss = 0.5616
Epoch 4, Step 280: Loss = 0.1240
Epoch 4, Step 290: Loss = 0.3602
Epoch 4, Step 300: Lo




Epoch 4 Summary:
Training Metrics:
accuracy: 0.6860
precision: 0.6975
recall: 0.6571
f1: 0.6767
loss: 1.7973

Validation Metrics:
accuracy: 0.7042
precision: 0.7199
recall: 0.6683
f1: 0.6932
loss: 1.8820




Epoch 5, Step 0: Loss = 1.3089
Epoch 5, Step 10: Loss = 0.9341
Epoch 5, Step 20: Loss = 2.7797
Epoch 5, Step 30: Loss = 0.8017
Epoch 5, Step 40: Loss = 1.3581
Epoch 5, Step 50: Loss = 1.0660
Epoch 5, Step 60: Loss = 3.0621
Epoch 5, Step 70: Loss = 2.2621
Epoch 5, Step 80: Loss = 5.1549
Epoch 5, Step 90: Loss = 0.0405
Epoch 5, Step 100: Loss = 1.0704
Epoch 5, Step 110: Loss = 3.0277
Epoch 5, Step 120: Loss = 0.6154
Epoch 5, Step 130: Loss = 1.4377
Epoch 5, Step 140: Loss = 1.6356
Epoch 5, Step 150: Loss = 0.5097
Epoch 5, Step 160: Loss = 3.6896
Epoch 5, Step 170: Loss = 1.0269
Epoch 5, Step 180: Loss = 4.1624
Epoch 5, Step 190: Loss = 1.8021
Epoch 5, Step 200: Loss = 4.2829
Epoch 5, Step 210: Loss = 0.5094
Epoch 5, Step 220: Loss = 3.2533
Epoch 5, Step 230: Loss = 5.0907
Epoch 5, Step 240: Loss = 3.2730
Epoch 5, Step 250: Loss = 4.4665
Epoch 5, Step 260: Loss = 2.3833
Epoch 5, Step 270: Loss = 0.4159
Epoch 5, Step 280: Loss = 1.4485
Epoch 5, Step 290: Loss = 1.3861
Epoch 5, Step 300: Lo




Epoch 5 Summary:
Training Metrics:
accuracy: 0.6963
precision: 0.7077
recall: 0.6687
f1: 0.6877
loss: 1.7224

Validation Metrics:
accuracy: 0.7050
precision: 0.7204
recall: 0.6700
f1: 0.6943
loss: 1.8372




Epoch 6, Step 0: Loss = 2.2669
Epoch 6, Step 10: Loss = 0.3102
Epoch 6, Step 20: Loss = 2.0786
Epoch 6, Step 30: Loss = 2.0154
Epoch 6, Step 40: Loss = 4.1097
Epoch 6, Step 50: Loss = 2.5131
Epoch 6, Step 60: Loss = 0.5913
Epoch 6, Step 70: Loss = 1.0862
Epoch 6, Step 80: Loss = 1.9338
Epoch 6, Step 90: Loss = 1.4814
Epoch 6, Step 100: Loss = 1.4745
Epoch 6, Step 110: Loss = 2.0235
Epoch 6, Step 120: Loss = 1.2674
Epoch 6, Step 130: Loss = 1.0243
Epoch 6, Step 140: Loss = 0.9619
Epoch 6, Step 150: Loss = 2.0210
Epoch 6, Step 160: Loss = 1.2441
Epoch 6, Step 170: Loss = 3.8975
Epoch 6, Step 180: Loss = 0.2593
Epoch 6, Step 190: Loss = 0.4106
Epoch 6, Step 200: Loss = 1.5869
Epoch 6, Step 210: Loss = 0.2347
Epoch 6, Step 220: Loss = 1.5681
Epoch 6, Step 230: Loss = 2.6996
Epoch 6, Step 240: Loss = 2.5990
Epoch 6, Step 250: Loss = 0.1486
Epoch 6, Step 260: Loss = 2.5933
Epoch 6, Step 270: Loss = 2.5614
Epoch 6, Step 280: Loss = 1.1181
Epoch 6, Step 290: Loss = 2.2565
Epoch 6, Step 300: Lo




Epoch 6 Summary:
Training Metrics:
accuracy: 0.6994
precision: 0.7135
recall: 0.6663
f1: 0.6891
loss: 1.7002

Validation Metrics:
accuracy: 0.7083
precision: 0.7232
recall: 0.6750
f1: 0.6983
loss: 1.8131




Epoch 7, Step 0: Loss = 1.3895
Epoch 7, Step 10: Loss = 1.6943
Epoch 7, Step 20: Loss = 0.3569
Epoch 7, Step 30: Loss = 1.8910
Epoch 7, Step 40: Loss = 2.9507
Epoch 7, Step 50: Loss = 1.9359
Epoch 7, Step 60: Loss = 1.8504
Epoch 7, Step 70: Loss = 0.0070
Epoch 7, Step 80: Loss = 1.1214
Epoch 7, Step 90: Loss = 2.1966
Epoch 7, Step 100: Loss = 2.3299
Epoch 7, Step 110: Loss = 0.1551
Epoch 7, Step 120: Loss = 1.4898
Epoch 7, Step 130: Loss = 0.4344
Epoch 7, Step 140: Loss = 2.2253
Epoch 7, Step 150: Loss = 1.9971
Epoch 7, Step 160: Loss = 1.0763
Epoch 7, Step 170: Loss = 1.9201
Epoch 7, Step 180: Loss = 1.7144
Epoch 7, Step 190: Loss = 3.5872
Epoch 7, Step 200: Loss = 0.9302
Epoch 7, Step 210: Loss = 3.0224
Epoch 7, Step 220: Loss = 1.7220
Epoch 7, Step 230: Loss = 1.5429
Epoch 7, Step 240: Loss = 4.9280
Epoch 7, Step 250: Loss = 0.9017
Epoch 7, Step 260: Loss = 0.6735
Epoch 7, Step 270: Loss = 0.8544
Epoch 7, Step 280: Loss = 1.6476
Epoch 7, Step 290: Loss = 3.0776
Epoch 7, Step 300: Lo




Epoch 7 Summary:
Training Metrics:
accuracy: 0.7010
precision: 0.7130
recall: 0.6729
f1: 0.6924
loss: 1.6923

Validation Metrics:
accuracy: 0.7092
precision: 0.7237
recall: 0.6767
f1: 0.6994
loss: 1.8021




Epoch 8, Step 0: Loss = 0.3737
Epoch 8, Step 10: Loss = 2.4846
Epoch 8, Step 20: Loss = 0.7235
Epoch 8, Step 30: Loss = 1.1161
Epoch 8, Step 40: Loss = 2.6707
Epoch 8, Step 50: Loss = 0.0171
Epoch 8, Step 60: Loss = 1.8811
Epoch 8, Step 70: Loss = 4.2377
Epoch 8, Step 80: Loss = 0.6847
Epoch 8, Step 90: Loss = 0.8486
Epoch 8, Step 100: Loss = 0.3856
Epoch 8, Step 110: Loss = 0.8021
Epoch 8, Step 120: Loss = 1.0815
Epoch 8, Step 130: Loss = 1.0936
Epoch 8, Step 140: Loss = 1.1237
Epoch 8, Step 150: Loss = 2.7276
Epoch 8, Step 160: Loss = 2.0111
Epoch 8, Step 170: Loss = 0.5039
Epoch 8, Step 180: Loss = 0.4455
Epoch 8, Step 190: Loss = 3.4320
Epoch 8, Step 200: Loss = 2.1549
Epoch 8, Step 210: Loss = 0.2831
Epoch 8, Step 220: Loss = 3.7046
Epoch 8, Step 230: Loss = 3.3712
Epoch 8, Step 240: Loss = 2.6997
Epoch 8, Step 250: Loss = 1.5833
Epoch 8, Step 260: Loss = 0.2736
Epoch 8, Step 270: Loss = 0.3743
Epoch 8, Step 280: Loss = 0.8986
Epoch 8, Step 290: Loss = 0.9888
Epoch 8, Step 300: Lo




Epoch 8 Summary:
Training Metrics:
accuracy: 0.7013
precision: 0.7135
recall: 0.6725
f1: 0.6924
loss: 1.6748

Validation Metrics:
accuracy: 0.7092
precision: 0.7237
recall: 0.6767
f1: 0.6994
loss: 1.7958




Epoch 9, Step 0: Loss = 1.4255
Epoch 9, Step 10: Loss = 2.3865
Epoch 9, Step 20: Loss = 2.9377
Epoch 9, Step 30: Loss = 4.7885
Epoch 9, Step 40: Loss = 2.4532
Epoch 9, Step 50: Loss = 4.8548
Epoch 9, Step 60: Loss = 1.7188
Epoch 9, Step 70: Loss = 2.7863
Epoch 9, Step 80: Loss = 0.0060
Epoch 9, Step 90: Loss = 2.9754
Epoch 9, Step 100: Loss = 1.7123
Epoch 9, Step 110: Loss = 2.5666
Epoch 9, Step 120: Loss = 1.0871
Epoch 9, Step 130: Loss = 0.5703
Epoch 9, Step 140: Loss = 2.5477
Epoch 9, Step 150: Loss = 0.1591
Epoch 9, Step 160: Loss = 1.8331
Epoch 9, Step 170: Loss = 3.2147
Epoch 9, Step 180: Loss = 1.9149
Epoch 9, Step 190: Loss = 0.0095
Epoch 9, Step 200: Loss = 0.1361
Epoch 9, Step 210: Loss = 2.4796
Epoch 9, Step 220: Loss = 1.2056
Epoch 9, Step 230: Loss = 0.1292
Epoch 9, Step 240: Loss = 1.7142
Epoch 9, Step 250: Loss = 1.5926
Epoch 9, Step 260: Loss = 1.4080
Epoch 9, Step 270: Loss = 1.3901
Epoch 9, Step 280: Loss = 0.5340
Epoch 9, Step 290: Loss = 1.4478
Epoch 9, Step 300: Lo




Epoch 9 Summary:
Training Metrics:
accuracy: 0.7037
precision: 0.7164
recall: 0.6746
f1: 0.6948
loss: 1.6658

Validation Metrics:
accuracy: 0.7092
precision: 0.7237
recall: 0.6767
f1: 0.6994
loss: 1.7930


In [2]:
import os
import json
gc = None
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoConfig,
    BitsAndBytesConfig,
    get_linear_schedule_with_warmup,
)
from peft import get_peft_model, LoraConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch.nn.functional as F
from huggingface_hub import login
import bitsandbytes as bnb
import re
import gc
import seaborn as sns
import matplotlib.pyplot as plt

def setup_environment():
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

class EmailDataset(Dataset):
    def __init__(self, emails_df, tokenizer, max_length=512):
        self.emails_df = emails_df.copy()
        self.emails_df['sender'] = self.emails_df['sender'].apply(clean_text)
        self.emails_df['subject'] = self.emails_df['subject'].apply(clean_text)
        self.emails_df['body'] = self.emails_df['body'].apply(clean_text)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.emails_df)

    def __getitem__(self, idx):
        email = self.emails_df.iloc[idx]
        input_text = f"Sender: {email['sender']} [SEP] Subject: {email['subject']} [SEP] {email['body']}"
        encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(email['label'], dtype=torch.long)
        }

def clean_text(text):
    if not isinstance(text, str):
        text = ""
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def setup_model_and_tokenizer(model_name, device):
    # initialize tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.padding_side = "right"
    tokenizer.pad_token = tokenizer.eos_token

    # model configuration
    model_config = AutoConfig.from_pretrained(
        model_name,
        num_labels=2,
        pad_token_id=tokenizer.pad_token_id,
        use_cache=False
    )

    # 8-bit quantization setup
    quantization_config = BitsAndBytesConfig(load_in_8bit=True)
    base_model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=model_config,
        torch_dtype=torch.bfloat16,
        quantization_config=quantization_config
    )

    # LoRA configuration
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="SEQ_CLS"
    )
    model = get_peft_model(base_model, lora_config)
    model.gradient_checkpointing_enable()
    return model, tokenizer


def compute_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)
    conf_matrix = confusion_matrix(labels, preds)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    }


def plot_confusion_matrix(conf_matrix, output_dir):
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
    plt.close()


def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds, all_labels = [], []
    total_loss = 0
    with torch.no_grad():
        for batch in data_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            loss = outputs.loss
            total_loss += loss.item()
            preds = torch.argmax(outputs.logits, dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['label'].cpu().numpy())
    metrics = compute_metrics(all_preds, all_labels)
    metrics['loss'] = total_loss / len(data_loader)
    return metrics


def train_model(model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=9):
    best_val_metrics = {'f1': 0}
    best_model_state = None
    model = model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_preds, train_labels = [], []
        for step, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            loss = outputs.loss
            loss.backward()
            if (step + 1) % 2 == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
            total_loss += loss.item()
            preds = torch.argmax(outputs.logits, dim=-1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(batch['label'].cpu().numpy())
            if step % 10 == 0:
                print(f"Epoch {epoch+1}, Step {step}: Loss = {loss.item():.4f}")
            torch.cuda.empty_cache()
        train_metrics = compute_metrics(train_preds, train_labels)
        train_metrics['loss'] = total_loss / len(train_loader)
        val_metrics = evaluate_model(model, val_loader, device)
        print(f"\nEpoch {epoch+1} Summary:")
        print_metrics = lambda m: [print(f"{k}: {v:.4f}") for k, v in m.items() if k != 'confusion_matrix']
        print("Training Metrics:")
        print_metrics(train_metrics)
        print("\nValidation Metrics:")
        print_metrics(val_metrics)
        if val_metrics['f1'] > best_val_metrics['f1']:
            best_val_metrics = val_metrics
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
        torch.cuda.empty_cache()
        gc.collect()
    return best_model_state, best_val_metrics


def main():
    login(token="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")
    device = setup_environment()
    # switch to Mistral-7B model
    model_name = "mistralai/Mistral-7B-v0.1"
    data_path = os.path.expanduser("~/Downloads/Tune/FineTune/newdata_cleaned.csv")
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"File not found: {data_path}")
    model, tokenizer = setup_model_and_tokenizer(model_name, device)
    emails_df = pd.read_csv(data_path)
    train_df, val_df = train_test_split(
        emails_df, test_size=0.2, stratify=emails_df['label'], random_state=42
    )
    train_dataset = EmailDataset(train_df, tokenizer, max_length=512)
    val_dataset = EmailDataset(val_df, tokenizer, max_length=512)
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, num_workers=2, pin_memory=True)
    optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=2e-5)
    num_epochs = 9
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = num_training_steps // 20
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    best_model_state, best_metrics = train_model(
        model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=num_epochs
    )
    output_dir = os.path.expanduser("~/Downloads/Tune/FineTune/binary_classification_mistral")
    os.makedirs(output_dir, exist_ok=True)
    # save PEFT-wrapped model and tokenizer
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    # save training config and metrics
    config = {
        "model_name": model_name,
        "num_epochs": num_epochs,
        "learning_rate": 2e-5,
        "batch_size": 8,
        "max_length": 512,
        "warmup_steps": num_warmup_steps,
        "total_steps": num_training_steps,
        "device": str(device),
        "best_metrics": {k: float(v) if k != 'confusion_matrix' else v.tolist() for k, v in best_metrics.items()}
    }
    with open(os.path.join(output_dir, "training_config.json"), "w") as f:
        json.dump(config, f, indent=2)

if __name__ == "__main__":
    main()


Using GPU: NVIDIA RTX A5000
GPU Memory: 23.68 GB


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of MistralForSequenceClassification were not initialized from the model checkpoint at mistralai/Mistral-7B-v0.1 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Step 0: Loss = 11.0144
Epoch 1, Step 10: Loss = 4.4789
Epoch 1, Step 20: Loss = 6.2724
Epoch 1, Step 30: Loss = 5.7053
Epoch 1, Step 40: Loss = 3.4180
Epoch 1, Step 50: Loss = 6.9233
Epoch 1, Step 60: Loss = 6.6618
Epoch 1, Step 70: Loss = 4.4406
Epoch 1, Step 80: Loss = 8.4976
Epoch 1, Step 90: Loss = 4.7993
Epoch 1, Step 100: Loss = 3.6008
Epoch 1, Step 110: Loss = 4.1995
Epoch 1, Step 120: Loss = 5.5875
Epoch 1, Step 130: Loss = 2.2347
Epoch 1, Step 140: Loss = 2.4018
Epoch 1, Step 150: Loss = 4.9808
Epoch 1, Step 160: Loss = 3.3161
Epoch 1, Step 170: Loss = 5.2373
Epoch 1, Step 180: Loss = 1.7471
Epoch 1, Step 190: Loss = 11.8584
Epoch 1, Step 200: Loss = 6.1159
Epoch 1, Step 210: Loss = 3.9472
Epoch 1, Step 220: Loss = 4.5249
Epoch 1, Step 230: Loss = 2.9620
Epoch 1, Step 240: Loss = 6.5698
Epoch 1, Step 250: Loss = 4.3863
Epoch 1, Step 260: Loss = 10.9907
Epoch 1, Step 270: Loss = 3.9604
Epoch 1, Step 280: Loss = 5.9834
Epoch 1, Step 290: Loss = 6.6325
Epoch 1, Step 300:




Epoch 1 Summary:
Training Metrics:
accuracy: 0.5215
precision: 0.5122
recall: 0.9008
f1: 0.6531
loss: 4.5172

Validation Metrics:
accuracy: 0.5633
precision: 0.5447
recall: 0.7717
f1: 0.6386
loss: 2.8352




Epoch 2, Step 0: Loss = 6.1672
Epoch 2, Step 10: Loss = 2.3898
Epoch 2, Step 20: Loss = 3.3454
Epoch 2, Step 30: Loss = 3.7459
Epoch 2, Step 40: Loss = 4.1902
Epoch 2, Step 50: Loss = 0.8775
Epoch 2, Step 60: Loss = 1.3822
Epoch 2, Step 70: Loss = 2.9451
Epoch 2, Step 80: Loss = 2.6739
Epoch 2, Step 90: Loss = 4.2030
Epoch 2, Step 100: Loss = 3.5155
Epoch 2, Step 110: Loss = 0.0948
Epoch 2, Step 120: Loss = 2.0932
Epoch 2, Step 130: Loss = 1.3959
Epoch 2, Step 140: Loss = 1.4338
Epoch 2, Step 150: Loss = 1.6439
Epoch 2, Step 160: Loss = 0.0248
Epoch 2, Step 170: Loss = 2.1752
Epoch 2, Step 180: Loss = 0.4570
Epoch 2, Step 190: Loss = 1.3965
Epoch 2, Step 200: Loss = 0.4177
Epoch 2, Step 210: Loss = 1.9337
Epoch 2, Step 220: Loss = 0.3697
Epoch 2, Step 230: Loss = 3.8065
Epoch 2, Step 240: Loss = 0.6871
Epoch 2, Step 250: Loss = 5.1951
Epoch 2, Step 260: Loss = 1.7572
Epoch 2, Step 270: Loss = 3.6431
Epoch 2, Step 280: Loss = 1.1843
Epoch 2, Step 290: Loss = 0.6126
Epoch 2, Step 300: Lo




Epoch 2 Summary:
Training Metrics:
accuracy: 0.6410
precision: 0.6156
recall: 0.7508
f1: 0.6766
loss: 1.9832

Validation Metrics:
accuracy: 0.6750
precision: 0.6586
recall: 0.7267
f1: 0.6910
loss: 1.6652




Epoch 3, Step 0: Loss = 1.2152
Epoch 3, Step 10: Loss = 1.6102
Epoch 3, Step 20: Loss = 1.9743
Epoch 3, Step 30: Loss = 3.6414
Epoch 3, Step 40: Loss = 1.1431
Epoch 3, Step 50: Loss = 1.0116
Epoch 3, Step 60: Loss = 1.0044
Epoch 3, Step 70: Loss = 0.7341
Epoch 3, Step 80: Loss = 2.2380
Epoch 3, Step 90: Loss = 1.1697
Epoch 3, Step 100: Loss = 3.9205
Epoch 3, Step 110: Loss = 0.4992
Epoch 3, Step 120: Loss = 0.8348
Epoch 3, Step 130: Loss = 0.4732
Epoch 3, Step 140: Loss = 0.0052
Epoch 3, Step 150: Loss = 2.7847
Epoch 3, Step 160: Loss = 0.7133
Epoch 3, Step 170: Loss = 2.1583
Epoch 3, Step 180: Loss = 1.5500
Epoch 3, Step 190: Loss = 2.6846
Epoch 3, Step 200: Loss = 0.9637
Epoch 3, Step 210: Loss = 3.0236
Epoch 3, Step 220: Loss = 4.0855
Epoch 3, Step 230: Loss = 3.1547
Epoch 3, Step 240: Loss = 0.4734
Epoch 3, Step 250: Loss = 1.0515
Epoch 3, Step 260: Loss = 0.6187
Epoch 3, Step 270: Loss = 0.9076
Epoch 3, Step 280: Loss = 0.0932
Epoch 3, Step 290: Loss = 1.2509
Epoch 3, Step 300: Lo




Epoch 3 Summary:
Training Metrics:
accuracy: 0.7206
precision: 0.7006
recall: 0.7704
f1: 0.7339
loss: 1.3864

Validation Metrics:
accuracy: 0.7425
precision: 0.7263
recall: 0.7783
f1: 0.7514
loss: 1.3354




Epoch 4, Step 0: Loss = 0.6833
Epoch 4, Step 10: Loss = 0.3143
Epoch 4, Step 20: Loss = 1.9596
Epoch 4, Step 30: Loss = 0.8291
Epoch 4, Step 40: Loss = 0.2503
Epoch 4, Step 50: Loss = 0.5606
Epoch 4, Step 60: Loss = 0.0359
Epoch 4, Step 70: Loss = 2.5164
Epoch 4, Step 80: Loss = 0.9383
Epoch 4, Step 90: Loss = 1.1410
Epoch 4, Step 100: Loss = 1.3534
Epoch 4, Step 110: Loss = 4.7007
Epoch 4, Step 120: Loss = 0.6299
Epoch 4, Step 130: Loss = 0.0877
Epoch 4, Step 140: Loss = 0.4970
Epoch 4, Step 150: Loss = 0.3356
Epoch 4, Step 160: Loss = 1.5285
Epoch 4, Step 170: Loss = 1.8175
Epoch 4, Step 180: Loss = 1.5497
Epoch 4, Step 190: Loss = 0.1805
Epoch 4, Step 200: Loss = 2.0935
Epoch 4, Step 210: Loss = 0.6519
Epoch 4, Step 220: Loss = 0.0374
Epoch 4, Step 230: Loss = 1.8114
Epoch 4, Step 240: Loss = 1.3333
Epoch 4, Step 250: Loss = 0.6642
Epoch 4, Step 260: Loss = 0.2094
Epoch 4, Step 270: Loss = 0.1145
Epoch 4, Step 280: Loss = 0.5310
Epoch 4, Step 290: Loss = 0.0798
Epoch 4, Step 300: Lo




Epoch 4 Summary:
Training Metrics:
accuracy: 0.7546
precision: 0.7346
recall: 0.7971
f1: 0.7646
loss: 1.1673

Validation Metrics:
accuracy: 0.7675
precision: 0.7481
recall: 0.8067
f1: 0.7763
loss: 1.1974




Epoch 5, Step 0: Loss = 0.3458
Epoch 5, Step 10: Loss = 0.0769
Epoch 5, Step 20: Loss = 2.7053
Epoch 5, Step 30: Loss = 0.0545
Epoch 5, Step 40: Loss = 2.2910
Epoch 5, Step 50: Loss = 1.0668
Epoch 5, Step 60: Loss = 1.0030
Epoch 5, Step 70: Loss = 0.8226
Epoch 5, Step 80: Loss = 2.3529
Epoch 5, Step 90: Loss = 1.3237
Epoch 5, Step 100: Loss = 1.1862
Epoch 5, Step 110: Loss = 1.6630
Epoch 5, Step 120: Loss = 0.8432
Epoch 5, Step 130: Loss = 0.2741
Epoch 5, Step 140: Loss = 1.1029
Epoch 5, Step 150: Loss = 0.7915
Epoch 5, Step 160: Loss = 0.9836
Epoch 5, Step 170: Loss = 1.5876
Epoch 5, Step 180: Loss = 0.2822
Epoch 5, Step 190: Loss = 0.7002
Epoch 5, Step 200: Loss = 1.0172
Epoch 5, Step 210: Loss = 1.6254
Epoch 5, Step 220: Loss = 2.1918
Epoch 5, Step 230: Loss = 3.0301
Epoch 5, Step 240: Loss = 0.3027
Epoch 5, Step 250: Loss = 4.5181
Epoch 5, Step 260: Loss = 0.3487
Epoch 5, Step 270: Loss = 0.5168
Epoch 5, Step 280: Loss = 0.4114
Epoch 5, Step 290: Loss = 1.5120
Epoch 5, Step 300: Lo




Epoch 5 Summary:
Training Metrics:
accuracy: 0.7735
precision: 0.7514
recall: 0.8175
f1: 0.7831
loss: 1.0762

Validation Metrics:
accuracy: 0.7783
precision: 0.7554
recall: 0.8233
f1: 0.7879
loss: 1.1333




Epoch 6, Step 0: Loss = 1.1575
Epoch 6, Step 10: Loss = 2.1090
Epoch 6, Step 20: Loss = 2.5501
Epoch 6, Step 30: Loss = 0.5987
Epoch 6, Step 40: Loss = 1.1132
Epoch 6, Step 50: Loss = 1.1178
Epoch 6, Step 60: Loss = 1.2708
Epoch 6, Step 70: Loss = 1.1846
Epoch 6, Step 80: Loss = 0.1683
Epoch 6, Step 90: Loss = 2.0991
Epoch 6, Step 100: Loss = 1.2198
Epoch 6, Step 110: Loss = 1.1815
Epoch 6, Step 120: Loss = 1.6097
Epoch 6, Step 130: Loss = 0.7489
Epoch 6, Step 140: Loss = 1.3726
Epoch 6, Step 150: Loss = 0.7692
Epoch 6, Step 160: Loss = 0.2334
Epoch 6, Step 170: Loss = 0.4133
Epoch 6, Step 180: Loss = 0.1772
Epoch 6, Step 190: Loss = 0.3404
Epoch 6, Step 200: Loss = 1.9419
Epoch 6, Step 210: Loss = 0.0083
Epoch 6, Step 220: Loss = 0.2102
Epoch 6, Step 230: Loss = 1.1992
Epoch 6, Step 240: Loss = 1.5220
Epoch 6, Step 250: Loss = 0.4550
Epoch 6, Step 260: Loss = 2.2495
Epoch 6, Step 270: Loss = 0.3898
Epoch 6, Step 280: Loss = 0.8162
Epoch 6, Step 290: Loss = 0.2518
Epoch 6, Step 300: Lo




Epoch 6 Summary:
Training Metrics:
accuracy: 0.7817
precision: 0.7610
recall: 0.8213
f1: 0.7900
loss: 1.0251

Validation Metrics:
accuracy: 0.7817
precision: 0.7584
recall: 0.8267
f1: 0.7911
loss: 1.1041




Epoch 7, Step 0: Loss = 1.5632
Epoch 7, Step 10: Loss = 0.5512
Epoch 7, Step 20: Loss = 0.2140
Epoch 7, Step 30: Loss = 0.1682
Epoch 7, Step 40: Loss = 1.4108
Epoch 7, Step 50: Loss = 1.1833
Epoch 7, Step 60: Loss = 0.5808
Epoch 7, Step 70: Loss = 0.2798
Epoch 7, Step 80: Loss = 0.5960
Epoch 7, Step 90: Loss = 1.3553
Epoch 7, Step 100: Loss = 1.3510
Epoch 7, Step 110: Loss = 1.1720
Epoch 7, Step 120: Loss = 0.0883
Epoch 7, Step 130: Loss = 0.0924
Epoch 7, Step 140: Loss = 1.3891
Epoch 7, Step 150: Loss = 3.0635
Epoch 7, Step 160: Loss = 0.1187
Epoch 7, Step 170: Loss = 3.0975
Epoch 7, Step 180: Loss = 0.3527
Epoch 7, Step 190: Loss = 0.6549
Epoch 7, Step 200: Loss = 1.4347
Epoch 7, Step 210: Loss = 1.5404
Epoch 7, Step 220: Loss = 1.3929
Epoch 7, Step 230: Loss = 0.1271
Epoch 7, Step 240: Loss = 0.5751
Epoch 7, Step 250: Loss = 1.0511
Epoch 7, Step 260: Loss = 0.1827
Epoch 7, Step 270: Loss = 0.5635
Epoch 7, Step 280: Loss = 0.6240
Epoch 7, Step 290: Loss = 0.6908
Epoch 7, Step 300: Lo




Epoch 7 Summary:
Training Metrics:
accuracy: 0.7881
precision: 0.7661
recall: 0.8296
f1: 0.7966
loss: 0.9965

Validation Metrics:
accuracy: 0.7858
precision: 0.7626
recall: 0.8300
f1: 0.7949
loss: 1.0835




Epoch 8, Step 0: Loss = 2.0506
Epoch 8, Step 10: Loss = 2.2039
Epoch 8, Step 20: Loss = 0.1054
Epoch 8, Step 30: Loss = 0.9077
Epoch 8, Step 40: Loss = 0.2612
Epoch 8, Step 50: Loss = 0.7970
Epoch 8, Step 60: Loss = 0.0249
Epoch 8, Step 70: Loss = 0.4630
Epoch 8, Step 80: Loss = 1.4290
Epoch 8, Step 90: Loss = 1.6756
Epoch 8, Step 100: Loss = 0.6592
Epoch 8, Step 110: Loss = 0.8804
Epoch 8, Step 120: Loss = 0.6285
Epoch 8, Step 130: Loss = 0.0035
Epoch 8, Step 140: Loss = 0.1164
Epoch 8, Step 150: Loss = 0.8034
Epoch 8, Step 160: Loss = 0.0108
Epoch 8, Step 170: Loss = 0.9579
Epoch 8, Step 180: Loss = 2.3557
Epoch 8, Step 190: Loss = 0.4627
Epoch 8, Step 200: Loss = 0.3111
Epoch 8, Step 210: Loss = 0.8470
Epoch 8, Step 220: Loss = 0.5150
Epoch 8, Step 230: Loss = 0.8725
Epoch 8, Step 240: Loss = 1.3001
Epoch 8, Step 250: Loss = 0.1751
Epoch 8, Step 260: Loss = 2.4933
Epoch 8, Step 270: Loss = 1.5086
Epoch 8, Step 280: Loss = 0.0083
Epoch 8, Step 290: Loss = 1.1694
Epoch 8, Step 300: Lo




Epoch 8 Summary:
Training Metrics:
accuracy: 0.7908
precision: 0.7697
recall: 0.8300
f1: 0.7987
loss: 0.9842

Validation Metrics:
accuracy: 0.7892
precision: 0.7657
recall: 0.8333
f1: 0.7981
loss: 1.0733




Epoch 9, Step 0: Loss = 2.8891
Epoch 9, Step 10: Loss = 0.8393
Epoch 9, Step 20: Loss = 0.7205
Epoch 9, Step 30: Loss = 0.0155
Epoch 9, Step 40: Loss = 0.0215
Epoch 9, Step 50: Loss = 1.7510
Epoch 9, Step 60: Loss = 0.2488
Epoch 9, Step 70: Loss = 0.9211
Epoch 9, Step 80: Loss = 1.5501
Epoch 9, Step 90: Loss = 0.3928
Epoch 9, Step 100: Loss = 0.2062
Epoch 9, Step 110: Loss = 1.1353
Epoch 9, Step 120: Loss = 0.0013
Epoch 9, Step 130: Loss = 2.2504
Epoch 9, Step 140: Loss = 0.2503
Epoch 9, Step 150: Loss = 0.6431
Epoch 9, Step 160: Loss = 3.1333
Epoch 9, Step 170: Loss = 0.5245
Epoch 9, Step 180: Loss = 0.3638
Epoch 9, Step 190: Loss = 1.7643
Epoch 9, Step 200: Loss = 2.6654
Epoch 9, Step 210: Loss = 0.7576
Epoch 9, Step 220: Loss = 0.0055
Epoch 9, Step 230: Loss = 0.7528
Epoch 9, Step 240: Loss = 2.2117
Epoch 9, Step 250: Loss = 2.4503
Epoch 9, Step 260: Loss = 0.0094
Epoch 9, Step 270: Loss = 1.3970
Epoch 9, Step 280: Loss = 0.2414
Epoch 9, Step 290: Loss = 0.0255
Epoch 9, Step 300: Lo




Epoch 9 Summary:
Training Metrics:
accuracy: 0.7898
precision: 0.7688
recall: 0.8287
f1: 0.7977
loss: 0.9757

Validation Metrics:
accuracy: 0.7900
precision: 0.7661
recall: 0.8350
f1: 0.7990
loss: 1.0674


In [1]:
import os
import json
gc = None
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoConfig,
    BitsAndBytesConfig,
    get_linear_schedule_with_warmup,
)
from peft import get_peft_model, LoraConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch.nn.functional as F
from huggingface_hub import login
import bitsandbytes as bnb
import re
import gc
import seaborn as sns
import matplotlib.pyplot as plt

def setup_environment():
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

class EmailDataset(Dataset):
    def __init__(self, emails_df, tokenizer, max_length=512):
        self.emails_df = emails_df.copy()
        self.emails_df['sender'] = self.emails_df['sender'].apply(clean_text)
        self.emails_df['subject'] = self.emails_df['subject'].apply(clean_text)
        self.emails_df['body'] = self.emails_df['body'].apply(clean_text)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.emails_df)

    def __getitem__(self, idx):
        email = self.emails_df.iloc[idx]
        input_text = f"Sender: {email['sender']} [SEP] Subject: {email['subject']} [SEP] {email['body']}"
        encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(email['label'], dtype=torch.long)
        }

def clean_text(text):
    if not isinstance(text, str):
        text = ""
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def setup_model_and_tokenizer(model_name, device):
    # initialize tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.padding_side = "right"
    tokenizer.pad_token = tokenizer.eos_token

    # model configuration
    model_config = AutoConfig.from_pretrained(
        model_name,
        num_labels=2,
        pad_token_id=tokenizer.pad_token_id,
        use_cache=False
    )

    # 8-bit quantization setup
    quantization_config = BitsAndBytesConfig(load_in_8bit=True)
    base_model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=model_config,
        torch_dtype=torch.bfloat16,
        quantization_config=quantization_config
    )

    # LoRA configuration
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="SEQ_CLS"
    )
    model = get_peft_model(base_model, lora_config)
    model.gradient_checkpointing_enable()
    return model, tokenizer


def compute_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)
    conf_matrix = confusion_matrix(labels, preds)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    }


def plot_confusion_matrix(conf_matrix, output_dir):
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
    plt.close()


def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds, all_labels = [], []
    total_loss = 0
    with torch.no_grad():
        for batch in data_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            loss = outputs.loss
            total_loss += loss.item()
            preds = torch.argmax(outputs.logits, dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['label'].cpu().numpy())
    metrics = compute_metrics(all_preds, all_labels)
    metrics['loss'] = total_loss / len(data_loader)
    return metrics


def train_model(model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=9):
    best_val_metrics = {'f1': 0}
    best_model_state = None
    model = model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_preds, train_labels = [], []
        for step, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            loss = outputs.loss
            loss.backward()
            if (step + 1) % 2 == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
            total_loss += loss.item()
            preds = torch.argmax(outputs.logits, dim=-1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(batch['label'].cpu().numpy())
            if step % 10 == 0:
                print(f"Epoch {epoch+1}, Step {step}: Loss = {loss.item():.4f}")
            torch.cuda.empty_cache()
        train_metrics = compute_metrics(train_preds, train_labels)
        train_metrics['loss'] = total_loss / len(train_loader)
        val_metrics = evaluate_model(model, val_loader, device)
        print(f"\nEpoch {epoch+1} Summary:")
        print_metrics = lambda m: [print(f"{k}: {v:.4f}") for k, v in m.items() if k != 'confusion_matrix']
        print("Training Metrics:")
        print_metrics(train_metrics)
        print("\nValidation Metrics:")
        print_metrics(val_metrics)
        if val_metrics['f1'] > best_val_metrics['f1']:
            best_val_metrics = val_metrics
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
        torch.cuda.empty_cache()
        gc.collect()
    return best_model_state, best_val_metrics


def main():
    login(token="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")
    device = setup_environment()
   
    model_name = 'Qwen/Qwen3-8B'
    data_path = os.path.expanduser("~/Downloads/Tune/FineTune/newdata_cleaned.csv")
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"File not found: {data_path}")
    model, tokenizer = setup_model_and_tokenizer(model_name, device)
    emails_df = pd.read_csv(data_path)
    train_df, val_df = train_test_split(
        emails_df, test_size=0.2, stratify=emails_df['label'], random_state=42
    )
    train_dataset = EmailDataset(train_df, tokenizer, max_length=512)
    val_dataset = EmailDataset(val_df, tokenizer, max_length=512)
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, num_workers=2, pin_memory=True)
    optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=2e-5)
    num_epochs = 9
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = num_training_steps // 20
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    best_model_state, best_metrics = train_model(
        model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=num_epochs
    )
    output_dir = os.path.expanduser("~/Downloads/Tune/FineTune/binary_classification_Qwen")
    os.makedirs(output_dir, exist_ok=True)
    # save PEFT-wrapped model and tokenizer
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    # save training config and metrics
    config = {
        "model_name": model_name,
        "num_epochs": num_epochs,
        "learning_rate": 2e-5,
        "batch_size": 8,
        "max_length": 512,
        "warmup_steps": num_warmup_steps,
        "total_steps": num_training_steps,
        "device": str(device),
        "best_metrics": {k: float(v) if k != 'confusion_matrix' else v.tolist() for k, v in best_metrics.items()}
    }
    with open(os.path.join(output_dir, "training_config.json"), "w") as f:
        json.dump(config, f, indent=2)

if __name__ == "__main__":
    main()


Using GPU: NVIDIA RTX A5000
GPU Memory: 23.68 GB


model.safetensors.index.json:   0%|          | 0.00/32.9k [00:00<?, ?B/s]

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not in

model-00004-of-00005.safetensors:   0%|          | 0.00/3.19G [00:00<?, ?B/s]

model-00005-of-00005.safetensors:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

model-00003-of-00005.safetensors:   0%|          | 0.00/3.96G [00:00<?, ?B/s]

model-00002-of-00005.safetensors:   0%|          | 0.00/3.99G [00:00<?, ?B/s]

model-00001-of-00005.safetensors:   0%|          | 0.00/4.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Step 0: Loss = 2.0400
Epoch 1, Step 10: Loss = 1.5032
Epoch 1, Step 20: Loss = 0.9281
Epoch 1, Step 30: Loss = 1.2490
Epoch 1, Step 40: Loss = 0.7756
Epoch 1, Step 50: Loss = 1.3464
Epoch 1, Step 60: Loss = 0.5596
Epoch 1, Step 70: Loss = 1.7058
Epoch 1, Step 80: Loss = 0.7351
Epoch 1, Step 90: Loss = 1.9586
Epoch 1, Step 100: Loss = 1.6562
Epoch 1, Step 110: Loss = 1.1429
Epoch 1, Step 120: Loss = 1.5777
Epoch 1, Step 130: Loss = 1.8174
Epoch 1, Step 140: Loss = 0.2428
Epoch 1, Step 150: Loss = 1.7237
Epoch 1, Step 160: Loss = 1.1876
Epoch 1, Step 170: Loss = 0.9066
Epoch 1, Step 180: Loss = 1.5048
Epoch 1, Step 190: Loss = 1.2130
Epoch 1, Step 200: Loss = 0.6908
Epoch 1, Step 210: Loss = 1.5208
Epoch 1, Step 220: Loss = 2.0937
Epoch 1, Step 230: Loss = 1.3716
Epoch 1, Step 240: Loss = 1.7857
Epoch 1, Step 250: Loss = 0.9519
Epoch 1, Step 260: Loss = 1.4492
Epoch 1, Step 270: Loss = 0.9419
Epoch 1, Step 280: Loss = 0.6507
Epoch 1, Step 290: Loss = 1.1684
Epoch 1, Step 300: Lo




Epoch 1 Summary:
Training Metrics:
accuracy: 0.5267
precision: 0.5146
recall: 0.9379
f1: 0.6646
loss: 1.2231

Validation Metrics:
accuracy: 0.6100
precision: 0.5846
recall: 0.7600
f1: 0.6609
loss: 0.6945




Epoch 2, Step 0: Loss = 0.4935
Epoch 2, Step 10: Loss = 0.8578
Epoch 2, Step 20: Loss = 0.5711
Epoch 2, Step 30: Loss = 1.0760
Epoch 2, Step 40: Loss = 0.8506
Epoch 2, Step 50: Loss = 1.0648
Epoch 2, Step 60: Loss = 0.4281
Epoch 2, Step 70: Loss = 0.5850
Epoch 2, Step 80: Loss = 1.1257
Epoch 2, Step 90: Loss = 0.8209
Epoch 2, Step 100: Loss = 0.9596
Epoch 2, Step 110: Loss = 0.4166
Epoch 2, Step 120: Loss = 0.3897
Epoch 2, Step 130: Loss = 0.4037
Epoch 2, Step 140: Loss = 0.8420
Epoch 2, Step 150: Loss = 0.3076
Epoch 2, Step 160: Loss = 0.7378
Epoch 2, Step 170: Loss = 0.6178
Epoch 2, Step 180: Loss = 0.6433
Epoch 2, Step 190: Loss = 0.5357
Epoch 2, Step 200: Loss = 0.3213
Epoch 2, Step 210: Loss = 0.4607
Epoch 2, Step 220: Loss = 0.4637
Epoch 2, Step 230: Loss = 0.4731
Epoch 2, Step 240: Loss = 0.5359
Epoch 2, Step 250: Loss = 0.8210
Epoch 2, Step 260: Loss = 0.6590
Epoch 2, Step 270: Loss = 0.5159
Epoch 2, Step 280: Loss = 0.6984
Epoch 2, Step 290: Loss = 0.3334
Epoch 2, Step 300: Lo




Epoch 2 Summary:
Training Metrics:
accuracy: 0.7423
precision: 0.7182
recall: 0.7975
f1: 0.7558
loss: 0.5395

Validation Metrics:
accuracy: 0.8192
precision: 0.7888
recall: 0.8717
f1: 0.8282
loss: 0.4339




Epoch 3, Step 0: Loss = 0.5836
Epoch 3, Step 10: Loss = 0.5113
Epoch 3, Step 20: Loss = 0.3375
Epoch 3, Step 30: Loss = 0.4506
Epoch 3, Step 40: Loss = 0.7465
Epoch 3, Step 50: Loss = 0.5448
Epoch 3, Step 60: Loss = 0.4044
Epoch 3, Step 70: Loss = 0.2668
Epoch 3, Step 80: Loss = 0.3763
Epoch 3, Step 90: Loss = 0.4105
Epoch 3, Step 100: Loss = 0.2770
Epoch 3, Step 110: Loss = 0.3383
Epoch 3, Step 120: Loss = 0.3678
Epoch 3, Step 130: Loss = 0.4097
Epoch 3, Step 140: Loss = 0.3559
Epoch 3, Step 150: Loss = 0.5972
Epoch 3, Step 160: Loss = 0.5034
Epoch 3, Step 170: Loss = 0.2258
Epoch 3, Step 180: Loss = 0.4395
Epoch 3, Step 190: Loss = 0.5918
Epoch 3, Step 200: Loss = 0.5012
Epoch 3, Step 210: Loss = 0.5838
Epoch 3, Step 220: Loss = 0.3095
Epoch 3, Step 230: Loss = 0.3524
Epoch 3, Step 240: Loss = 0.3641
Epoch 3, Step 250: Loss = 0.2782
Epoch 3, Step 260: Loss = 0.2705
Epoch 3, Step 270: Loss = 0.4984
Epoch 3, Step 280: Loss = 0.1895
Epoch 3, Step 290: Loss = 1.0596
Epoch 3, Step 300: Lo




Epoch 3 Summary:
Training Metrics:
accuracy: 0.8433
precision: 0.8259
recall: 0.8700
f1: 0.8474
loss: 0.4007

Validation Metrics:
accuracy: 0.8650
precision: 0.8521
recall: 0.8833
f1: 0.8674
loss: 0.3610




Epoch 4, Step 0: Loss = 0.7704
Epoch 4, Step 10: Loss = 0.2577
Epoch 4, Step 20: Loss = 0.7176
Epoch 4, Step 30: Loss = 0.4735
Epoch 4, Step 40: Loss = 0.5380
Epoch 4, Step 50: Loss = 1.1122
Epoch 4, Step 60: Loss = 0.1949
Epoch 4, Step 70: Loss = 0.4304
Epoch 4, Step 80: Loss = 0.2104
Epoch 4, Step 90: Loss = 0.5541
Epoch 4, Step 100: Loss = 0.6340
Epoch 4, Step 110: Loss = 0.2387
Epoch 4, Step 120: Loss = 0.3883
Epoch 4, Step 130: Loss = 0.5401
Epoch 4, Step 140: Loss = 0.5307
Epoch 4, Step 150: Loss = 0.3679
Epoch 4, Step 160: Loss = 0.2383
Epoch 4, Step 170: Loss = 0.4136
Epoch 4, Step 180: Loss = 0.5416
Epoch 4, Step 190: Loss = 0.1973
Epoch 4, Step 200: Loss = 0.4288
Epoch 4, Step 210: Loss = 0.2718
Epoch 4, Step 220: Loss = 0.4971
Epoch 4, Step 230: Loss = 0.3559
Epoch 4, Step 240: Loss = 0.2384
Epoch 4, Step 250: Loss = 0.5520
Epoch 4, Step 260: Loss = 0.1441
Epoch 4, Step 270: Loss = 0.6296
Epoch 4, Step 280: Loss = 0.6485
Epoch 4, Step 290: Loss = 0.5729
Epoch 4, Step 300: Lo




Epoch 4 Summary:
Training Metrics:
accuracy: 0.8690
precision: 0.8575
recall: 0.8850
f1: 0.8710
loss: 0.3581

Validation Metrics:
accuracy: 0.8758
precision: 0.8667
recall: 0.8883
f1: 0.8774
loss: 0.3381




Epoch 5, Step 0: Loss = 0.3967
Epoch 5, Step 10: Loss = 0.1533
Epoch 5, Step 20: Loss = 0.3057
Epoch 5, Step 30: Loss = 0.2526
Epoch 5, Step 40: Loss = 0.3793
Epoch 5, Step 50: Loss = 0.2019
Epoch 5, Step 60: Loss = 0.1681
Epoch 5, Step 70: Loss = 0.5748
Epoch 5, Step 80: Loss = 0.6317
Epoch 5, Step 90: Loss = 0.4194
Epoch 5, Step 100: Loss = 0.2551
Epoch 5, Step 110: Loss = 0.2289
Epoch 5, Step 120: Loss = 0.2292
Epoch 5, Step 130: Loss = 0.3456
Epoch 5, Step 140: Loss = 0.6530
Epoch 5, Step 150: Loss = 0.4519
Epoch 5, Step 160: Loss = 0.2738
Epoch 5, Step 170: Loss = 0.3340
Epoch 5, Step 180: Loss = 0.2291
Epoch 5, Step 190: Loss = 0.2352
Epoch 5, Step 200: Loss = 0.2451
Epoch 5, Step 210: Loss = 0.3215
Epoch 5, Step 220: Loss = 0.2511
Epoch 5, Step 230: Loss = 0.2990
Epoch 5, Step 240: Loss = 0.1773
Epoch 5, Step 250: Loss = 0.2663
Epoch 5, Step 260: Loss = 0.1894
Epoch 5, Step 270: Loss = 0.2492
Epoch 5, Step 280: Loss = 0.4137
Epoch 5, Step 290: Loss = 0.3438
Epoch 5, Step 300: Lo




Epoch 5 Summary:
Training Metrics:
accuracy: 0.8779
precision: 0.8687
recall: 0.8904
f1: 0.8794
loss: 0.3415

Validation Metrics:
accuracy: 0.8808
precision: 0.8715
recall: 0.8933
f1: 0.8823
loss: 0.3270




Epoch 6, Step 0: Loss = 0.3083
Epoch 6, Step 10: Loss = 0.3131
Epoch 6, Step 20: Loss = 0.2576
Epoch 6, Step 30: Loss = 0.2864
Epoch 6, Step 40: Loss = 0.1823
Epoch 6, Step 50: Loss = 0.3457
Epoch 6, Step 60: Loss = 0.7559
Epoch 6, Step 70: Loss = 0.3130
Epoch 6, Step 80: Loss = 0.3231
Epoch 6, Step 90: Loss = 0.3391
Epoch 6, Step 100: Loss = 0.3090
Epoch 6, Step 110: Loss = 0.2850
Epoch 6, Step 120: Loss = 0.5416
Epoch 6, Step 130: Loss = 0.6859
Epoch 6, Step 140: Loss = 0.7074
Epoch 6, Step 150: Loss = 0.1691
Epoch 6, Step 160: Loss = 0.2883
Epoch 6, Step 170: Loss = 1.4343
Epoch 6, Step 180: Loss = 0.8671
Epoch 6, Step 190: Loss = 1.1141
Epoch 6, Step 200: Loss = 0.4129
Epoch 6, Step 210: Loss = 0.4518
Epoch 6, Step 220: Loss = 0.5931
Epoch 6, Step 230: Loss = 0.2152
Epoch 6, Step 240: Loss = 0.4375
Epoch 6, Step 250: Loss = 0.2649
Epoch 6, Step 260: Loss = 0.3738
Epoch 6, Step 270: Loss = 0.3496
Epoch 6, Step 280: Loss = 0.2314
Epoch 6, Step 290: Loss = 0.2393
Epoch 6, Step 300: Lo




Epoch 6 Summary:
Training Metrics:
accuracy: 0.8831
precision: 0.8770
recall: 0.8912
f1: 0.8841
loss: 0.3319

Validation Metrics:
accuracy: 0.8808
precision: 0.8703
recall: 0.8950
f1: 0.8825
loss: 0.3227




Epoch 7, Step 0: Loss = 0.2119
Epoch 7, Step 10: Loss = 0.2691
Epoch 7, Step 20: Loss = 0.1466
Epoch 7, Step 30: Loss = 0.1895
Epoch 7, Step 40: Loss = 0.5581
Epoch 7, Step 50: Loss = 0.4857
Epoch 7, Step 60: Loss = 0.3602
Epoch 7, Step 70: Loss = 0.2163
Epoch 7, Step 80: Loss = 0.3529
Epoch 7, Step 90: Loss = 0.3178
Epoch 7, Step 100: Loss = 0.1936
Epoch 7, Step 110: Loss = 0.2263
Epoch 7, Step 120: Loss = 0.7496
Epoch 7, Step 130: Loss = 0.2838
Epoch 7, Step 140: Loss = 0.1994
Epoch 7, Step 150: Loss = 0.3087
Epoch 7, Step 160: Loss = 0.4410
Epoch 7, Step 170: Loss = 0.1600
Epoch 7, Step 180: Loss = 0.1729
Epoch 7, Step 190: Loss = 0.3289
Epoch 7, Step 200: Loss = 0.4733
Epoch 7, Step 210: Loss = 0.3739
Epoch 7, Step 220: Loss = 0.2684
Epoch 7, Step 230: Loss = 0.2334
Epoch 7, Step 240: Loss = 0.7815
Epoch 7, Step 250: Loss = 0.3623
Epoch 7, Step 260: Loss = 0.1311
Epoch 7, Step 270: Loss = 0.3312
Epoch 7, Step 280: Loss = 0.1462
Epoch 7, Step 290: Loss = 0.2451
Epoch 7, Step 300: Lo




Epoch 7 Summary:
Training Metrics:
accuracy: 0.8862
precision: 0.8759
recall: 0.9000
f1: 0.8878
loss: 0.3289

Validation Metrics:
accuracy: 0.8825
precision: 0.8708
recall: 0.8983
f1: 0.8843
loss: 0.3201




Epoch 8, Step 0: Loss = 0.2203
Epoch 8, Step 10: Loss = 0.1895
Epoch 8, Step 20: Loss = 1.4238
Epoch 8, Step 30: Loss = 0.3891
Epoch 8, Step 40: Loss = 0.2578
Epoch 8, Step 50: Loss = 0.1392
Epoch 8, Step 60: Loss = 0.3378
Epoch 8, Step 70: Loss = 0.4827
Epoch 8, Step 80: Loss = 0.2176
Epoch 8, Step 90: Loss = 0.2140
Epoch 8, Step 100: Loss = 0.3228
Epoch 8, Step 110: Loss = 0.3936
Epoch 8, Step 120: Loss = 0.3117
Epoch 8, Step 130: Loss = 0.4519
Epoch 8, Step 140: Loss = 0.4622
Epoch 8, Step 150: Loss = 0.3693
Epoch 8, Step 160: Loss = 0.1556
Epoch 8, Step 170: Loss = 0.2538
Epoch 8, Step 180: Loss = 0.1868
Epoch 8, Step 190: Loss = 0.6475
Epoch 8, Step 200: Loss = 0.1653
Epoch 8, Step 210: Loss = 0.3542
Epoch 8, Step 220: Loss = 0.4491
Epoch 8, Step 230: Loss = 0.3337
Epoch 8, Step 240: Loss = 0.3051
Epoch 8, Step 250: Loss = 0.2622
Epoch 8, Step 260: Loss = 0.5999
Epoch 8, Step 270: Loss = 0.1852
Epoch 8, Step 280: Loss = 0.3570
Epoch 8, Step 290: Loss = 0.1599
Epoch 8, Step 300: Lo




Epoch 8 Summary:
Training Metrics:
accuracy: 0.8879
precision: 0.8803
recall: 0.8979
f1: 0.8890
loss: 0.3272

Validation Metrics:
accuracy: 0.8817
precision: 0.8718
recall: 0.8950
f1: 0.8832
loss: 0.3187




Epoch 9, Step 0: Loss = 0.3322
Epoch 9, Step 10: Loss = 0.3156
Epoch 9, Step 20: Loss = 0.2705
Epoch 9, Step 30: Loss = 0.1074
Epoch 9, Step 40: Loss = 0.1773
Epoch 9, Step 50: Loss = 0.7843
Epoch 9, Step 60: Loss = 0.3809
Epoch 9, Step 70: Loss = 0.4093
Epoch 9, Step 80: Loss = 0.3910
Epoch 9, Step 90: Loss = 0.3058
Epoch 9, Step 100: Loss = 0.3301
Epoch 9, Step 110: Loss = 0.2834
Epoch 9, Step 120: Loss = 0.2857
Epoch 9, Step 130: Loss = 0.2809
Epoch 9, Step 140: Loss = 0.3030
Epoch 9, Step 150: Loss = 0.6060
Epoch 9, Step 160: Loss = 0.3376
Epoch 9, Step 170: Loss = 0.7047
Epoch 9, Step 180: Loss = 0.1599
Epoch 9, Step 190: Loss = 0.3878
Epoch 9, Step 200: Loss = 0.2093
Epoch 9, Step 210: Loss = 0.1912
Epoch 9, Step 220: Loss = 0.7185
Epoch 9, Step 230: Loss = 0.2558
Epoch 9, Step 240: Loss = 0.4205
Epoch 9, Step 250: Loss = 0.2906
Epoch 9, Step 260: Loss = 0.4654
Epoch 9, Step 270: Loss = 0.5212
Epoch 9, Step 280: Loss = 0.2625
Epoch 9, Step 290: Loss = 0.1758
Epoch 9, Step 300: Lo




Epoch 9 Summary:
Training Metrics:
accuracy: 0.8900
precision: 0.8830
recall: 0.8992
f1: 0.8910
loss: 0.3262

Validation Metrics:
accuracy: 0.8825
precision: 0.8708
recall: 0.8983
f1: 0.8843
loss: 0.3180


In [None]:
Qwen 
Training Metrics:
accuracy: 0.8900
precision: 0.8830
recall: 0.8992
f1: 0.8910
loss: 0.3262

Validation Metrics:
accuracy: 0.8825
precision: 0.8708
recall: 0.8983
f1: 0.8843
loss: 0.3180
    
    
    
Mistral 
Epoch 9 Summary:
Training Metrics:
accuracy: 0.7898
precision: 0.7688
recall: 0.8287
f1: 0.7977
loss: 0.9757

Validation Metrics:
accuracy: 0.7900
precision: 0.7661
recall: 0.8350
f1: 0.7990
loss: 1.0674
    
    
    
    
    
Wizard
Epoch 9 Summary:
Training Metrics:
accuracy: 0.7037
precision: 0.7164
recall: 0.6746
f1: 0.6948
loss: 1.6658

Validation Metrics:
accuracy: 0.7092
precision: 0.7237
recall: 0.6767
f1: 0.6994
loss: 1.7930
    
    
    
    
    
BERT
Epoch 6 Summary:
Training Metrics:
accuracy: 0.9810
precision: 0.9849
recall: 0.9771
f1: 0.9810
loss: 0.0617

Validation Metrics:
accuracy: 0.9817
precision: 0.9785
recall: 0.9850
f1: 0.9817
loss: 0.0522
    
    
    
    
Llama 8B
Epoch 9 Summary:
Training Metrics:
accuracy: 0.8310
precision: 0.8312
recall: 0.8308
f1: 0.8310
loss: 0.4024

Validation Metrics:
accuracy: 0.8358
precision: 0.8309
recall: 0.8433
f1: 0.8371
loss: 0.4034
    
    
Llama 7B
Epoch 8 Summary:
Training Metrics:
accuracy: 0.7631
precision: 0.7497
recall: 0.7900
f1: 0.7693
loss: 0.5396

Validation Metrics:
accuracy: 0.7683
precision: 0.7439
recall: 0.8183
f1: 0.7794
loss: 0.5283

In [None]:
import os
import json
gc = None
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    AutoConfig,
    BitsAndBytesConfig,
    get_linear_schedule_with_warmup,
)
from peft import get_peft_model, LoraConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch.nn.functional as F
from huggingface_hub import login
import bitsandbytes as bnb
import re
import gc
import seaborn as sns
import matplotlib.pyplot as plt

def setup_environment():
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    if torch.cuda.is_available():
        device = torch.device("cuda:0")
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
        os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
    else:
        device = torch.device("cpu")
        print("Using CPU")
    return device

class EmailDataset(Dataset):
    def __init__(self, emails_df, tokenizer, max_length=512):
        self.emails_df = emails_df.copy()
        self.emails_df['sender'] = self.emails_df['sender'].apply(clean_text)
        self.emails_df['subject'] = self.emails_df['subject'].apply(clean_text)
        self.emails_df['body'] = self.emails_df['body'].apply(clean_text)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.emails_df)

    def __getitem__(self, idx):
        email = self.emails_df.iloc[idx]
        input_text = f"Sender: {email['sender']} [SEP] Subject: {email['subject']} [SEP] {email['body']}"
        encoding = self.tokenizer(
            input_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(),
            'attention_mask': encoding['attention_mask'].squeeze(),
            'label': torch.tensor(email['label'], dtype=torch.long)
        }

def clean_text(text):
    if not isinstance(text, str):
        text = ""
    text = re.sub(r'[^A-Za-z\s]', '', text)
    text = text.lower()
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def setup_model_and_tokenizer(model_name, device):
    # initialize tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.padding_side = "right"
    tokenizer.pad_token = tokenizer.eos_token

    # model configuration
    model_config = AutoConfig.from_pretrained(
        model_name,
        num_labels=2,
        pad_token_id=tokenizer.pad_token_id,
        use_cache=False
    )

    # 8-bit quantization setup
    quantization_config = BitsAndBytesConfig(load_in_8bit=True)
    base_model = AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=model_config,
        torch_dtype=torch.bfloat16,
        quantization_config=quantization_config
    )

    # LoRA configuration
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="SEQ_CLS"
    )
    model = get_peft_model(base_model, lora_config)
    model.gradient_checkpointing_enable()
    return model, tokenizer


def compute_metrics(preds, labels):
    accuracy = accuracy_score(labels, preds)
    precision = precision_score(labels, preds)
    recall = recall_score(labels, preds)
    f1 = f1_score(labels, preds)
    conf_matrix = confusion_matrix(labels, preds)
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1,
        'confusion_matrix': conf_matrix
    }


def plot_confusion_matrix(conf_matrix, output_dir):
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.savefig(os.path.join(output_dir, 'confusion_matrix.png'))
    plt.close()


def evaluate_model(model, data_loader, device):
    model.eval()
    all_preds, all_labels = [], []
    total_loss = 0
    with torch.no_grad():
        for batch in data_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            loss = outputs.loss
            total_loss += loss.item()
            preds = torch.argmax(outputs.logits, dim=-1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(batch['label'].cpu().numpy())
    metrics = compute_metrics(all_preds, all_labels)
    metrics['loss'] = total_loss / len(data_loader)
    return metrics


def train_model(model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=9):
    best_val_metrics = {'f1': 0}
    best_model_state = None
    model = model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        train_preds, train_labels = [], []
        for step, batch in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.autocast(device_type='cuda', dtype=torch.float16):
                outputs = model(
                    input_ids=batch['input_ids'],
                    attention_mask=batch['attention_mask'],
                    labels=batch['label']
                )
            loss = outputs.loss
            loss.backward()
            if (step + 1) % 2 == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
                optimizer.step()
                scheduler.step()
                optimizer.zero_grad()
            total_loss += loss.item()
            preds = torch.argmax(outputs.logits, dim=-1)
            train_preds.extend(preds.cpu().numpy())
            train_labels.extend(batch['label'].cpu().numpy())
            if step % 10 == 0:
                print(f"Epoch {epoch+1}, Step {step}: Loss = {loss.item():.4f}")
            torch.cuda.empty_cache()
        train_metrics = compute_metrics(train_preds, train_labels)
        train_metrics['loss'] = total_loss / len(train_loader)
        val_metrics = evaluate_model(model, val_loader, device)
        print(f"\nEpoch {epoch+1} Summary:")
        print_metrics = lambda m: [print(f"{k}: {v:.4f}") for k, v in m.items() if k != 'confusion_matrix']
        print("Training Metrics:")
        print_metrics(train_metrics)
        print("\nValidation Metrics:")
        print_metrics(val_metrics)
        if val_metrics['f1'] > best_val_metrics['f1']:
            best_val_metrics = val_metrics
            best_model_state = {k: v.cpu() for k, v in model.state_dict().items()}
        torch.cuda.empty_cache()
        gc.collect()
    return best_model_state, best_val_metrics


def main():
    login(token="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")
    device = setup_environment()
   
    model_name = 'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'
    data_path = os.path.expanduser("~/Downloads/Tune/FineTune/newdata_cleaned.csv")
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"File not found: {data_path}")
    model, tokenizer = setup_model_and_tokenizer(model_name, device)
    emails_df = pd.read_csv(data_path)
    train_df, val_df = train_test_split(
        emails_df, test_size=0.2, stratify=emails_df['label'], random_state=42
    )
    train_dataset = EmailDataset(train_df, tokenizer, max_length=512)
    val_dataset = EmailDataset(val_df, tokenizer, max_length=512)
    train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)
    val_loader = DataLoader(val_dataset, batch_size=8, num_workers=2, pin_memory=True)
    optimizer = bnb.optim.AdamW8bit(model.parameters(), lr=2e-5)
    num_epochs = 9
    num_training_steps = len(train_loader) * num_epochs
    num_warmup_steps = num_training_steps // 20
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps
    )
    best_model_state, best_metrics = train_model(
        model, train_loader, val_loader, optimizer, scheduler, device, num_epochs=num_epochs
    )
    output_dir = os.path.expanduser("~/Downloads/Tune/FineTune/binary_classification_DistillDeepSeek")
    os.makedirs(output_dir, exist_ok=True)
    # save PEFT-wrapped model and tokenizer
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    # save training config and metrics
    config = {
        "model_name": model_name,
        "num_epochs": num_epochs,
        "learning_rate": 2e-5,
        "batch_size": 8,
        "max_length": 512,
        "warmup_steps": num_warmup_steps,
        "total_steps": num_training_steps,
        "device": str(device),
        "best_metrics": {k: float(v) if k != 'confusion_matrix' else v.tolist() for k, v in best_metrics.items()}
    }
    with open(os.path.join(output_dir, "training_config.json"), "w") as f:
        json.dump(config, f, indent=2)

if __name__ == "__main__":
    main()
