In [54]:
# !pip install sentencepiece tensorboardX


In [129]:
# --- Common Utilities and Setup ---
import os
import json
import time
import torch
import transformers
import accelerate
import huggingface_hub
import peft
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification,AutoModelForCausalLM
from peft import LoraConfig, get_peft_model, TaskType
from sklearn.preprocessing import LabelEncoder
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support, classification_report, accuracy_score
from sklearn.utils import resample
from collections import Counter
import time
import onnxruntime as ort
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import List
import pickle

print("peft:", peft.__version__)
print("Torch:", torch.__version__)
print("Transformers:", transformers.__version__)
print("Accelerate:", accelerate.__version__)
print("Huggingface Hub:", huggingface_hub.__version__)

# Device Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

peft: 0.14.0
Torch: 2.2.2
Transformers: 4.49.0
Accelerate: 1.4.0
Huggingface Hub: 0.29.1


In [130]:
def update_model_dict(model_alias, MODEL_NAME):
    if not os.path.exists('model_dict.json'):
        model_dict = {}
    else:
        with open('model_dict.json', 'r') as file:
            model_dict = json.load(file)

    model_dict[model_alias] = MODEL_NAME

    with open('model_dict.json', 'w') as file:
        json.dump(model_dict, file)

In [131]:
def load_and_preprocess_data(filepath="./data/train-00000-of-00001-a5a7c6e4bb30b016.parquet"):
    """Loads and preprocesses the dataset."""
    df = pd.read_parquet(filepath)
    df = df[['conversation', 'issue_area']]
    print("Original distribution:\n", df['issue_area'].value_counts())
    label_encoder = LabelEncoder()
    df["labels"] = label_encoder.fit_transform(df["issue_area"])

    #saving Label-encoder
    label_encoder_path = f"model-metric/{model_alias}/label_encoder.pkl"
    os.makedirs(os.path.dirname(label_encoder_path), exist_ok=True)
    with open(label_encoder_path, "wb") as f:
        pickle.dump(label_encoder, f)
        
    return df, label_encoder

In [132]:
def balance_dataset(df, max_count=100, random_state=42):
    """Balances the dataset using oversampling."""
    balanced_df = pd.DataFrame()
    for issue in df['issue_area'].unique():
        subset = df[df['issue_area'] == issue]
        balanced_subset = resample(subset, replace=True, n_samples=max_count, random_state=random_state)
        balanced_df = pd.concat([balanced_df, balanced_subset])
    return balanced_df.sample(frac=1, random_state=random_state).reset_index(drop=True)


In [133]:
def preprocess_conversation(conversation):
    """Preprocesses a conversation."""
    if isinstance(conversation, list):
        return " ".join([turn.get('text', '') for turn in conversation if isinstance(turn, dict)])
    return str(conversation).lower()

In [134]:
# Define PyTorch Dataset
class CustomDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=256):
        self.data = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        row = self.data.iloc[index]
        encoding = self.tokenizer(
            row["conversation"], padding="max_length", truncation=True, max_length=self.max_length, return_tensors="pt"
        )
        label = torch.tensor(row["labels"], dtype=torch.long)
        return {key: val.squeeze(0) for key, val in encoding.items()}, label

In [135]:
def create_dataloaders(df, tokenizer, batch_size=8, train_ratio=0.75):
    """Creates train and test DataLoaders."""
    train_size = int(train_ratio * len(df))
    train_df, test_df = df[:train_size], df[train_size:]
    train_dataset = CustomDataset(train_df, tokenizer)
    test_dataset = CustomDataset(test_df, tokenizer)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader, test_df

In [136]:
class LoRAMobileLLM(nn.Module):
    def __init__(self, num_labels, lora_r=8, lora_alpha=16, lora_dropout=0.1):
        super(LoRAMobileLLM, self).__init__()
        # Load the base model with the correct number of labels
        self.bert = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME
            # num_labels=num_labels  # Ensure this matches the number of classes
        )
        
        # LoRA Configuration
        lora_config = LoraConfig(
            r=lora_r,
            lora_alpha=lora_alpha,
            lora_dropout=lora_dropout,
            target_modules=["q_proj", "v_proj"]
        )
        self.model = get_peft_model(self.bert, lora_config)
        
        # Custom classifier for issue prediction
        self.classifier = nn.Linear(self.model.config.hidden_size, num_labels)
        self.classifier = nn.Linear(32000, num_labels)


    def forward(self, input_ids, attention_mask):
        # Get the model outputs
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        
        # For CausalLM models, we need to use the last hidden state of the last token
        # Get the last token for each sequence in the batch
        batch_size = input_ids.shape[0]
        last_token_indices = torch.sum(attention_mask, dim=1) - 1
        
        # Extract hidden states from the last layer
        hidden_states = outputs.logits  # Shape: [batch_size, seq_len, vocab_size]
        
        # Use the representation at the first token (CLS token) for classification
        cls_embedding = hidden_states[:, 0, :]
        # print("Hidden state shape before classifier:", cls_embedding.shape)
        
        # Pass through the classifier
        return self.classifier(cls_embedding)

In [137]:
# Function to compute class weights
def compute_class_weights(labels, num_classes):
    counter = Counter(labels)
    total_samples = len(labels)
    weights = [total_samples / (num_classes * counter[i]) for i in range(num_classes)]
    return torch.tensor(weights, dtype=torch.float)

In [138]:
def train_model(model, train_loader, model_alias, epochs=3, learning_rate=5e-5, class_weights=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()
    epoch_losses = []

    # Create directory for model metrics if it doesn't exist
    model_dir = f"model-metric/{model_alias}"
    os.makedirs(model_dir, exist_ok=True)

    # TensorBoard writer inside the model directory
    writer = SummaryWriter(log_dir=model_dir)

    # Convert class weights to float and move to device
    if class_weights is not None:
        class_weights = class_weights.float().to(device)
    
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

    # Dictionary to store epoch-wise KPIs
    kpi_results = []

    for epoch in range(epochs):
        start_time = time.time()
        total_loss = 0
        all_preds = []
        all_labels = []

        for batch_idx, (batch, label) in enumerate(train_loader):
            batch = {k: v.to(device) for k, v in batch.items()}
            label = label.to(device)
            
            optimizer.zero_grad()
            
            # Forward pass
            logits = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
            
            loss = criterion(logits, label)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
            optimizer.step()
            
            total_loss += loss.item()
            
            # Store predictions and labels for evaluation
            preds = torch.argmax(logits, dim=1).cpu().tolist()
            labels = label.cpu().tolist()
            all_preds.extend(preds)
            all_labels.extend(labels)
            
            # Log batch-level loss
            if batch_idx % 10 == 0:
                writer.add_scalar("BatchLoss/train", loss.item(), epoch * len(train_loader) + batch_idx)

        # Compute epoch-level metrics
        avg_loss = total_loss / len(train_loader)
        epoch_losses.append(avg_loss)
        accuracy = accuracy_score(all_labels, all_preds)
        precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
        epoch_time = time.time() - start_time  # Time taken per epoch

        print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}, Accuracy={accuracy:.4f}, Precision={precision:.4f}, Recall={recall:.4f}, F1-score={f1:.4f}, Time={epoch_time:.2f}s")

        # Log metrics to TensorBoard
        writer.add_scalar("Loss/train", avg_loss, epoch)
        writer.add_scalar("Accuracy/train", accuracy, epoch)
        writer.add_scalar("Precision/train", precision, epoch)
        writer.add_scalar("Recall/train", recall, epoch)
        writer.add_scalar("F1-score/train", f1, epoch)
        writer.add_scalar("EpochTime/train", epoch_time, epoch)

        # Store KPI results
        kpi_results.append({
            "epoch": epoch + 1,
            "loss": avg_loss,
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1_score": f1,
            "time_taken": epoch_time
        })

    # Save KPIs as a JSON file
    import json
    with open(f"{model_dir}/training_metrics.json", "w") as f:
        json.dump(kpi_results, f, indent=4)

    # Plot and save training loss curve
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, epochs+1), epoch_losses, marker='o', linestyle='-', color='b')
    plt.title(f'Training Loss Over Epochs ({model_alias})')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid(True)
    plt.savefig(f"{model_dir}/training_loss.png")
    
    # Add figure to TensorBoard
    img = plt.gcf()
    writer.add_figure("Training Loss", img, close=True)

    writer.flush()
    writer.close()


In [139]:
def evaluate_model(model, test_loader, model_alias, label_encoder):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    # Create directory for model evaluation if it doesn't exist
    model_dir = f"model-metric/{model_alias}"
    os.makedirs(model_dir, exist_ok=True)

    # TensorBoard writer inside the model directory
    writer = SummaryWriter(log_dir=model_dir)

    all_preds, all_labels = [], []
    
    with torch.no_grad():
        for batch, label in test_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            label = label.to(device)

            # Forward pass
            outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(label.cpu().tolist())

    # Get class names
    class_names = label_encoder.classes_

    # Compute per-class metrics
    precision, recall, f1, support = precision_recall_fscore_support(all_labels, all_preds, average=None)
    
    # Create DataFrame for per-class metrics
    class_metrics = pd.DataFrame({
        'Class': class_names,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'Support': support
    })
    
    # Compute overall metrics
    overall_precision, overall_recall, overall_f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')
    
    # Print classification report
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=class_names))

    # Compute confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    cm_df = pd.DataFrame(cm, index=class_names, columns=class_names)
    
    # Plot confusion matrix
    plt.figure(figsize=(12, 10))
    sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.tight_layout()
    
    # Save confusion matrix
    cm_path = os.path.join(model_dir, "confusion_matrix.png")
    plt.savefig(cm_path)

    # Log confusion matrix to TensorBoard
    writer.add_figure("Confusion Matrix", plt.gcf(), close=True)

    # Display per-class metrics
    print("\nPer-class Metrics:")
    print(class_metrics.to_string(index=False))

    print(f"\nOverall Metrics:")
    print(f"Precision: {overall_precision:.4f}, Recall: {overall_recall:.4f}, F1-score: {overall_f1:.4f}")

    # Log overall metrics to TensorBoard
    writer.add_scalar("Precision/test", overall_precision)
    writer.add_scalar("Recall/test", overall_recall)
    writer.add_scalar("F1-score/test", overall_f1)

    # Log per-class metrics to TensorBoard
    for i, class_name in enumerate(class_names):
        writer.add_scalar(f"Precision/{class_name}", precision[i])
        writer.add_scalar(f"Recall/{class_name}", recall[i])
        writer.add_scalar(f"F1-score/{class_name}", f1[i])

    # Save per-class metrics and confusion matrix as CSV
    class_metrics.to_csv(os.path.join(model_dir, "class_metrics.csv"), index=False)
    cm_df.to_csv(os.path.join(model_dir, "confusion_matrix.csv"))

    # Save model checkpoint
    model_path = os.path.join(model_dir, f"{model_alias}.pth")
    torch.save(model.state_dict(), model_path)

    writer.flush()
    writer.close()

    return class_metrics, cm_df

In [140]:
# def export_to_onnx(model, tokenizer, onnx_path="model.onnx"):
#     """Exports the model to ONNX format."""
#     model.eval().to("cpu")
#     sample_input = tokenizer("test", return_tensors="pt")
#     input_names = ["input_ids", "attention_mask"]
#     output_names = ["output"]
#     torch.onnx.export(model, (sample_input["input_ids"], sample_input["attention_mask"]), onnx_path, input_names=input_names, output_names=output_names, dynamic_axes={"input_ids": {0: "batch", 1: "sequence"}, "attention_mask": {0: "batch", 1: "sequence"}, "output": {0: "batch"}})

# def run_onnx_inference(onnx_path, input_ids, attention_mask):
#     """Runs inference using ONNX Runtime."""
#     ort_session = ort.InferenceSession(onnx_path)
#     ort_inputs = {"input_ids": input_ids.tolist(), "attention_mask": attention_mask.tolist()}
#     ort_outputs = ort_session.run(None, ort_inputs)
#     return torch.tensor(np.array(ort_outputs[0]), dtype=torch.float32)

In [147]:
def export_to_onnx(model, tokenizer, model_alias):
    """Exports the MobileLLM-125M model to ONNX format."""
    model.eval().to("cpu")
    sample_input = tokenizer("test", return_tensors="pt")
    input_names = ["input_ids", "attention_mask"]
    output_names = ["output"]
    
    model_dir = f"model-metric/{model_alias}"
    os.makedirs(model_dir, exist_ok=True)
    onnx_path = os.path.join(model_dir, f"{model_alias}.onnx")
    
    torch.onnx.export(
        model, 
        (sample_input["input_ids"], sample_input["attention_mask"]), 
        onnx_path, 
        input_names=input_names, 
        output_names=output_names, 
        dynamic_axes={
            "input_ids": {0: "batch", 1: "sequence"}, 
            "attention_mask": {0: "batch", 1: "sequence"}, 
            "output": {0: "batch"}
        }
    )
    print(f"ONNX model exported to {onnx_path}")
    return onnx_path

def run_onnx_inference(onnx_path, input_ids, attention_mask):
    """Runs inference using ONNX Runtime."""
    ort_session = ort.InferenceSession(onnx_path)
    ort_inputs = {"input_ids": input_ids.tolist(), "attention_mask": attention_mask.tolist()}
    ort_outputs = ort_session.run(None, ort_inputs)
    return torch.tensor(np.array(ort_outputs[0]), dtype=torch.float32)

In [141]:
def compare_inference_performance(model, tokenizer, test_df, label_encoder):
    """Compares inference performance between PyTorch and ONNX Runtime."""
    sample_batch = test_df.sample(50)
    test_dataset_batch = CustomDataset(sample_batch, tokenizer)
    test_loader_batch = DataLoader(test_dataset_batch, batch_size=len(sample_batch), shuffle=False)
    batch = next(iter(test_loader_batch))
    input_ids, attention_mask, labels = batch
    
    # PyTorch Inference
    model.eval().to(device)
    start_time_torch = time.time()
    with torch.no_grad():
        torch_outputs = model(input_ids.to(device), attention_mask.to(device)).cpu()
    end_time_torch = time.time()
    latency_torch = end_time_torch - start_time_torch
    throughput_torch = len(sample_batch) / latency_torch

    # ONNX Inference
    export_to_onnx(model, tokenizer)
    start_time_onnx = time.time()
    onnx_outputs = run_onnx_inference("model.onnx", input_ids, attention_mask)
    end_time_onnx = time.time()
    latency_onnx = end_time_onnx - start_time_onnx
    throughput_onnx = len(sample_batch) / latency_onnx

    print(f"PyTorch Inference - Latency: {latency_torch:.4f}s, Throughput: {throughput_torch:.2f} samples/s")
    print(f"ONNX Inference - Latency: {latency_onnx:.4f}s, Throughput: {throughput_onnx:.2f} samples/s")

    torch_preds = torch.argmax(torch_outputs, dim=1).tolist()
    onnx_preds = torch.argmax(onnx_outputs, dim=1).tolist()
    actual_labels = labels.tolist()
    
    #compare predictions
    print("Torch predictions vs Actual labels")
    print(classification_report(actual_labels, torch_preds, target_names = label_encoder.classes_))
    print("Onnx predictions vs Actual labels")
    print(classification_report(actual_labels, onnx_preds, target_names = label_encoder.classes_))

In [148]:
def compare_inference_performance(model, tokenizer, test_df, label_encoder, model_alias):
    """Compares inference performance between PyTorch and ONNX Runtime."""
    model_dir = f"model-metric/{model_alias}"
    os.makedirs(model_dir, exist_ok=True)
    writer = SummaryWriter(log_dir=model_dir)
    
    sample_batch = test_df.sample(50)
    test_dataset_batch = CustomDataset(sample_batch, tokenizer)
    test_loader_batch = DataLoader(test_dataset_batch, batch_size=len(sample_batch), shuffle=False)
    batch = next(iter(test_loader_batch))
    input_ids, attention_mask, labels = batch
    
    # PyTorch Inference
    model.eval().to(device)
    start_time_torch = time.time()
    with torch.no_grad():
        torch_outputs = model(input_ids.to(device), attention_mask.to(device)).cpu()
    latency_torch = time.time() - start_time_torch
    throughput_torch = len(sample_batch) / latency_torch
    
    # ONNX Inference
    onnx_path = export_to_onnx(model, tokenizer, model_alias)
    start_time_onnx = time.time()
    onnx_outputs = run_onnx_inference(onnx_path, input_ids, attention_mask)
    latency_onnx = time.time() - start_time_onnx
    throughput_onnx = len(sample_batch) / latency_onnx
    
    print(f"PyTorch Inference - Latency: {latency_torch:.4f}s, Throughput: {throughput_torch:.2f} samples/s")
    print(f"ONNX Inference - Latency: {latency_onnx:.4f}s, Throughput: {throughput_onnx:.2f} samples/s")
    
    torch_preds = torch.argmax(torch_outputs, dim=1).tolist()
    onnx_preds = torch.argmax(onnx_outputs, dim=1).tolist()
    actual_labels = labels.tolist()
    
    # Compare predictions
    torch_report = classification_report(actual_labels, torch_preds, target_names=label_encoder.classes_, output_dict=True)
    onnx_report = classification_report(actual_labels, onnx_preds, target_names=label_encoder.classes_, output_dict=True)
    
    torch_df = pd.DataFrame(torch_report).transpose()
    onnx_df = pd.DataFrame(onnx_report).transpose()
    
    torch_df.to_csv(os.path.join(model_dir, "torch_classification_report.csv"))
    onnx_df.to_csv(os.path.join(model_dir, "onnx_classification_report.csv"))
    
    # Log metrics to TensorBoard
    writer.add_scalar("Latency/PyTorch", latency_torch)
    writer.add_scalar("Throughput/PyTorch", throughput_torch)
    writer.add_scalar("Latency/ONNX", latency_onnx)
    writer.add_scalar("Throughput/ONNX", throughput_onnx)
    
    writer.flush()
    writer.close()
    
    return torch_df, onnx_df

In [150]:
import os
import time
import torch
import numpy as np
import onnxruntime as ort
import pandas as pd
from torch.utils.data import DataLoader
from sklearn.metrics import classification_report
from torch.utils.tensorboard import SummaryWriter

def export_to_onnx(model, tokenizer, model_alias):
    """Exports MobileLLM-125M model to ONNX format."""
    model.eval().to("cpu")
    sample_input = tokenizer("test", return_tensors="pt")
    input_names = ["input_ids", "attention_mask"]
    output_names = ["output"]
    
    model_dir = f"model-metric/{model_alias}"
    os.makedirs(model_dir, exist_ok=True)
    onnx_path = os.path.join(model_dir, f"{model_alias}.onnx")
    
    torch.onnx.export(
        model, 
        (sample_input["input_ids"], sample_input["attention_mask"]), 
        onnx_path, 
        input_names=input_names, 
        output_names=output_names, 
        dynamic_axes={
            "input_ids": {0: "batch", 1: "sequence"}, 
            "attention_mask": {0: "batch", 1: "sequence"}, 
            "output": {0: "batch"}
        }
    )
    print(f"ONNX model exported to {onnx_path}")
    return onnx_path

def run_onnx_inference(onnx_path, batch):
    """Runs inference using ONNX Runtime."""
    ort_session = ort.InferenceSession(onnx_path)
    ort_inputs = {"input_ids": batch["input_ids"].tolist(), "attention_mask": batch["attention_mask"].tolist()}
    ort_outputs = ort_session.run(None, ort_inputs)
    return torch.tensor(np.array(ort_outputs[0]), dtype=torch.float32)

def compare_inference_performance(model, tokenizer, test_df, label_encoder, model_alias):
    """Compares inference performance between PyTorch and ONNX Runtime."""
    model_dir = f"model-metric/{model_alias}"
    os.makedirs(model_dir, exist_ok=True)
    writer = SummaryWriter(log_dir=model_dir)
    
    sample_batch = test_df.sample(50)
    test_dataset_batch = CustomDataset(sample_batch, tokenizer)
    test_loader_batch = DataLoader(test_dataset_batch, batch_size=len(sample_batch), shuffle=False)
    batch, labels = next(iter(test_loader_batch))
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval().to(device)
    
    # PyTorch Inference
    start_time_torch = time.time()
    with torch.no_grad():
        torch_outputs = model(input_ids=batch["input_ids"].to(device), attention_mask=batch["attention_mask"].to(device)).cpu()
    latency_torch = time.time() - start_time_torch
    throughput_torch = len(sample_batch) / latency_torch
    
    # ONNX Inference
    onnx_path = export_to_onnx(model, tokenizer, model_alias)
    start_time_onnx = time.time()
    onnx_outputs = run_onnx_inference(onnx_path, batch)
    latency_onnx = time.time() - start_time_onnx
    throughput_onnx = len(sample_batch) / latency_onnx
    
    print(f"PyTorch Inference - Latency: {latency_torch:.4f}s, Throughput: {throughput_torch:.2f} samples/s")
    print(f"ONNX Inference - Latency: {latency_onnx:.4f}s, Throughput: {throughput_onnx:.2f} samples/s")
    
    torch_preds = torch.argmax(torch_outputs, dim=1).tolist()
    onnx_preds = torch.argmax(onnx_outputs, dim=1).tolist()
    actual_labels = labels.tolist()
    
    # Classification Reports
    torch_report = classification_report(actual_labels, torch_preds, target_names=label_encoder.classes_, output_dict=True)
    onnx_report = classification_report(actual_labels, onnx_preds, target_names=label_encoder.classes_, output_dict=True)
    
    torch_df = pd.DataFrame(torch_report).transpose()
    onnx_df = pd.DataFrame(onnx_report).transpose()
    
    torch_df.to_csv(os.path.join(model_dir, "torch_classification_report.csv"))
    onnx_df.to_csv(os.path.join(model_dir, "onnx_classification_report.csv"))
    
    # Log metrics to TensorBoard
    writer.add_scalar("Latency/PyTorch", latency_torch)
    writer.add_scalar("Throughput/PyTorch", throughput_torch)
    writer.add_scalar("Latency/ONNX", latency_onnx)
    writer.add_scalar("Throughput/ONNX", throughput_onnx)
    
    writer.flush()
    writer.close()
    
    return torch_df, onnx_df

In [145]:
update_model_dict(model_alias, MODEL_NAME)

In [142]:
df, label_encoder = load_and_preprocess_data()
balanced_df = balance_dataset(df)
balanced_df['conversation'] = balanced_df['conversation'].apply(preprocess_conversation)

Original distribution:
 issue_area
Cancellations and returns    286
Order                        270
Login and Account            151
Shopping                     116
Warranty                     105
Shipping                      72
Name: count, dtype: int64


In [143]:
# Load tokenizer for facebook/MobileLLM-125M
MODEL_NAME = "facebook/MobileLLM-125M"
model_alias = 'mobileLLM-125M'

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
train_loader, test_loader, test_df = create_dataloaders(balanced_df, tokenizer)

In [144]:
# Model Initialization and Training
num_classes = len(label_encoder.classes_)
model = LoRAMobileLLM(num_labels=num_classes)
class_weights = compute_class_weights(balanced_df['labels'], num_classes)

print(f"class weights of each class is: {class_weights}")

The repository for facebook/MobileLLM-125M contains custom code which must be executed to correctly load the model. You can inspect the repository content at https://hf.co/facebook/MobileLLM-125M.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N]  y


Some weights of the model checkpoint at facebook/MobileLLM-125M were not used when initializing MobileLLMForCausalLM: {'lm_head.weight'}
- This IS expected if you are initializing MobileLLMForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MobileLLMForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


class weights of each class is: tensor([1., 1., 1., 1., 1., 1.])


In [146]:
train_model(model, train_loader, model_alias='mobileLLM-125M', epochs=10, learning_rate=5e-5, class_weights=class_weights)

Epoch 1: Loss=3.0575, Accuracy=0.1467, Precision=0.1447, Recall=0.1467, F1-score=0.1447, Time=145.54s
Epoch 2: Loss=2.1663, Accuracy=0.1578, Precision=0.1617, Recall=0.1578, F1-score=0.1575, Time=133.03s
Epoch 3: Loss=2.0549, Accuracy=0.1933, Precision=0.1870, Recall=0.1933, F1-score=0.1888, Time=124.46s
Epoch 4: Loss=2.0607, Accuracy=0.1556, Precision=0.1549, Recall=0.1556, F1-score=0.1535, Time=123.62s
Epoch 5: Loss=1.9634, Accuracy=0.1800, Precision=0.1691, Recall=0.1800, F1-score=0.1689, Time=1035.03s
Epoch 6: Loss=1.9844, Accuracy=0.1711, Precision=0.1750, Recall=0.1711, F1-score=0.1698, Time=570.61s
Epoch 7: Loss=2.0650, Accuracy=0.1733, Precision=0.1733, Recall=0.1733, F1-score=0.1654, Time=109.12s
Epoch 8: Loss=2.0550, Accuracy=0.1578, Precision=0.1500, Recall=0.1578, F1-score=0.1510, Time=110.38s
Epoch 9: Loss=1.9728, Accuracy=0.2244, Precision=0.2158, Recall=0.2244, F1-score=0.2076, Time=122.09s
Epoch 10: Loss=2.0942, Accuracy=0.1711, Precision=0.1664, Recall=0.1711, F1-score

In [113]:
# Model Evaluation
evaluate_model(model, test_loader, model_alias, label_encoder)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Classification Report:
                           precision    recall  f1-score   support

Cancellations and returns       0.13      1.00      0.24        20
        Login and Account       0.00      0.00      0.00        28
                    Order       0.00      0.00      0.00        30
                 Shipping       0.00      0.00      0.00        23
                 Shopping       0.00      0.00      0.00        22
                 Warranty       0.00      0.00      0.00        27

                 accuracy                           0.13       150
                macro avg       0.02      0.17      0.04       150
             weighted avg       0.02      0.13      0.03       150


Per-class Metrics:
                    Class  Precision  Recall  F1-Score  Support
Cancellations and returns   0.133333     1.0  0.235294       20
        Login and Account   0.000000     0.0  0.000000       28
                    Order   0.000000     0.0  0.000000       30
                 Shipping  

(                       Class  Precision  Recall  F1-Score  Support
 0  Cancellations and returns   0.133333     1.0  0.235294       20
 1          Login and Account   0.000000     0.0  0.000000       28
 2                      Order   0.000000     0.0  0.000000       30
 3                   Shipping   0.000000     0.0  0.000000       23
 4                   Shopping   0.000000     0.0  0.000000       22
 5                   Warranty   0.000000     0.0  0.000000       27,
                            Cancellations and returns  Login and Account  \
 Cancellations and returns                         20                  0   
 Login and Account                                 28                  0   
 Order                                             30                  0   
 Shipping                                          23                  0   
 Shopping                                          22                  0   
 Warranty                                          27              

In [114]:
tokenizer.save_pretrained(f"model-metric/{model_alias}/tokenizer/")

('model-metric/mobileLLM-125M/tokenizer/tokenizer_config.json',
 'model-metric/mobileLLM-125M/tokenizer/special_tokens_map.json',
 'model-metric/mobileLLM-125M/tokenizer/tokenizer.model',
 'model-metric/mobileLLM-125M/tokenizer/added_tokens.json')

In [151]:
compare_inference_performance(model, tokenizer, test_df, label_encoder, model_alias)

  if sequence_length != 1:


ONNX model exported to model-metric/mobileLLM-125M/mobileLLM-125M.onnx
PyTorch Inference - Latency: 6.3648s, Throughput: 7.86 samples/s
ONNX Inference - Latency: 9.3653s, Throughput: 5.34 samples/s


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


(                           precision    recall  f1-score  support
 Cancellations and returns     0.0000  0.000000  0.000000     7.00
 Login and Account             0.0000  0.000000  0.000000     9.00
 Order                         0.0000  0.000000  0.000000    10.00
 Shipping                      0.0000  0.000000  0.000000     7.00
 Shopping                      0.1800  1.000000  0.305085     9.00
 Warranty                      0.0000  0.000000  0.000000     8.00
 accuracy                      0.1800  0.180000  0.180000     0.18
 macro avg                     0.0300  0.166667  0.050847    50.00
 weighted avg                  0.0324  0.180000  0.054915    50.00,
                            precision    recall  f1-score  support
 Cancellations and returns     0.0000  0.000000  0.000000     7.00
 Login and Account             0.0000  0.000000  0.000000     9.00
 Order                         0.0000  0.000000  0.000000    10.00
 Shipping                      0.0000  0.000000  0.000000    

In [None]:
# Save the fine-tuned model
torch.save(model.state_dict(), "./lora_distilbert_trained.pth")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Test with a sample input
sample_text = "Agent: Thank you for calling BrownBox Customer Support. My name is Tom. How may I assist you today?"
inputs = tokenizer(sample_text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)

# Move input to same device as model
inputs = {key: value.to(device) for key, value in inputs.items()}

# Perform inference
model.eval()
with torch.no_grad():
    outputs = model(**inputs)

# Get predicted label
predicted_label = outputs.argmax().item()
print("Predicted issue area:", label_encoder.inverse_transform([predicted_label])[0])

# Close TensorBoard writer
writer.close()

In [None]:
# imporove model
# tensorboard
# training time vs gpu/cpu




# try to use accelearte