<a href="https://colab.research.google.com/github/Nanivadla95/Advanced-Predictive-Maintenance-Framework-for-Mechanical-Equipment-in-ML/blob/main/LLM_Fine_Tuning_Comparison_Study.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U transformers datasets evaluate -q





# Imports


In [None]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
import evaluate
import numpy as np


# Choosing the Base LLM

In [None]:
models_to_compare = ["bert-base-uncased", "roberta-base"]


#  Task and Data Preparation



https://huggingface.co/settings/tokens


In [None]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset("imdb")

# Access the 'train' split, then shuffle and select
train_dataset = dataset["train"].shuffle(seed=42).select(range(5000))
test_dataset = dataset["test"].shuffle(seed=42).select(range(1000))


Tokenization function


In [None]:
def tokenize(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True)


 #  Evaluation Metric

In [None]:
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)


#  Fine-Tuning Process
We loop over both models, tokenize, fine-tune with Trainer API, and evaluate.

In [None]:
results = {}

for model_name in models_to_compare:
    print(f"\n🚀 Fine-tuning {model_name}...\n")

    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenized_dataset = dataset.map(tokenize, batched=True)

    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

    training_args = TrainingArguments(
        output_dir=f"./results/{model_name}",
        evaluation_strategy="epoch",
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=1,
        save_strategy="no",
        report_to="none",
        logging_steps=10
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset["train"].select(range(4000)),
        eval_dataset=tokenized_dataset["train"].select(range(4000, 5000)),
        compute_metrics=compute_metrics,
    )

    trainer.train()
    eval_result = trainer.evaluate()
    results[model_name] = eval_result["eval_accuracy"]


#  Evaluation and Performance

In [None]:
print("\n📊 LLM Fine-Tuning Results Comparison:")
for model, acc in results.items():
    print(f"{model}: Accuracy = {acc:.4f}")


# Train the Model

In [None]:
trainer.train()


In [None]:
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
import numpy as np
import evaluate

# Load and subset the dataset
raw_dataset = load_dataset("imdb")
train_dataset = raw_dataset["train"].shuffle(seed=42).select(range(2000))
test_dataset = raw_dataset["test"].shuffle(seed=42).select(range(500))

# Load evaluation metric
accuracy = evaluate.load("accuracy")

# List of base models to compare
models_to_compare = ["bert-base-uncased", "roberta-base"]

for model_name in models_to_compare:
    print(f"\n🚀 Fine-tuning {model_name}...\n")

    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)

    # Define tokenize function inside the loop
    def tokenize(batch):
        return tokenizer(batch["text"], padding="max_length", truncation=True)

    # Tokenize the dataset
    tokenized_train = train_dataset.map(tokenize, batched=True)
    tokenized_test = test_dataset.map(tokenize, batched=True)

    # Load model
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

    # Set training arguments
    training_args = TrainingArguments(
        output_dir=f"./results/{model_name}",
        evaluation_strategy="epoch",
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=1,
        save_strategy="no",
        report_to="none"
    )

    # Accuracy metric
    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        predictions = np.argmax(logits, axis=-1)
        return accuracy.compute(predictions=predictions, references=labels)

    # Set up Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_test,
        compute_metrics=compute_metrics
    )

    # Fine-tune
    trainer.train()
    eval_result = trainer.evaluate()
    print(f"📊 Accuracy for {model_name}: {eval_result['eval_accuracy']:.4f}")
