<a href="https://colab.research.google.com/github/LUCID1010/Autorepair_of_DNN/blob/main/automatic_repair_of_dnn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import random
import string
import torch
import numpy as np
import evaluate
from datasets import Dataset, load_dataset  # To load SNLI
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
)
from peft import LoraConfig, get_peft_model

# %%
# ---------- Config ----------
MODEL_NAME = "roberta-base"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
REPAIR_BATCH_SIZE = 16
EPOCHS = 10
LR = 2e-4
MAX_LEN = 256
SEED = 42
OUTPUT_DIR = "./hybrid_autorepair_snli"
os.makedirs(OUTPUT_DIR, exist_ok=True)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

print(f"Using device: {DEVICE}")

# %%
# ---------- Perturbation utilities ----------
def add_typo(s: str, p=0.05) -> str:
    # Handle NoneType, which can appear in datasets
    if s is None:
        return ""
    chars = list(s)
    for i in range(len(chars)):
        if random.random() < p and chars[i].isalpha():
            chars[i] = random.choice(string.ascii_lowercase)
    return "".join(chars)

def swap_adjacent(s: str, p=0.02) -> str:
    if s is None:
        return ""
    chars = list(s)
    i = 0
    while i < len(chars) - 1:
        if random.random() < p and chars[i].isalpha() and chars[i+1].isalpha():
            chars[i], chars[i+1] = chars[i+1], chars[i]
            i += 2
        else:
            i += 1
    return "".join(chars)

# %%
# ---------- Load a REAL NLI dataset ----------
print("Loading SNLI dataset...")
# Load 2000 for train, 500 for val
snli_dataset = load_dataset("snli", split="train[:2000]")
val_dataset_raw = load_dataset("snli", split="validation[:500]")

# SNLI labels:
train_dataset = snli_dataset.filter(lambda example: example["label"] != -1)
val_dataset = val_dataset_raw.filter(lambda example: example["label"] != -1)

# Rename columns to match your old code
train_dataset = train_dataset.rename_column("premise", "context")
val_dataset = val_dataset.rename_column("premise", "context")

print(f"Training examples: {len(train_dataset)}")
print(f"Validation examples: {len(val_dataset)}")

# %%
# ---------- Create Perturbed Training Set and Clean Validation Set ----------
# Apply perturbations to the training data
def perturb_function(examples):
    examples["context"] = swap_adjacent(add_typo(examples["context"]))
    examples["hypothesis"] = swap_adjacent(add_typo(examples["hypothesis"]))
    return examples

print("Perturbing training data...")
perturbed_train_dataset = train_dataset.map(perturb_function)

# %%
# ---------- Tokenizer & Model ----------
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3,
    ignore_mismatched_sizes=True
).to(DEVICE)

# Tokenization function
def tokenize_function(examples):
    return tokenizer(
        examples["context"],
        examples["hypothesis"],
        truncation=True,
        max_length=MAX_LEN,
        padding=False
    )

print("Tokenizing datasets...")
# Apply tokenization
tokenized_train = perturbed_train_dataset.map(tokenize_function, batched=True)
tokenized_val = val_dataset.map(tokenize_function, batched=True)

# %%
# ---------- LoRA repair ----------
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query", "value", "key", "dense"],
    lora_dropout=0.05,
    bias="none",
    task_type="SEQ_CLS",
)
model_peft = get_peft_model(model, lora_config)
model_peft.to(DEVICE)
model_peft.print_trainable_parameters()

# %%
# ---------- Metrics Calculation ----------
accuracy_metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

# %%
# ---------- Trainer Setup ----------
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=REPAIR_BATCH_SIZE,
    per_device_eval_batch_size=REPAIR_BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LR,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    weight_decay=0.01,
    fp16=torch.cuda.is_available(),
    logging_steps=50,
)

data_collator = DataCollatorWithPadding(tokenizer)

trainer = Trainer(
    model=model_peft,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

# %%
# Train / repair
print("Starting training...")
trainer.train()
print("Training complete.")

# %%
# --- Final Evaluation ---
print("\n--- Final Evaluation on Clean Validation Set ---")
eval_results = trainer.evaluate(eval_dataset=tokenized_val)

print("\nEvaluation Results:")
print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"Loss: {eval_results['eval_loss']:.4f}")

# %%
# Save adapter & tokenizer
model_peft.save_pretrained(os.path.join(OUTPUT_DIR, "lora_adapter"))
tokenizer.save_pretrained(os.path.join(OUTPUT_DIR, "tokenizer"))

print(f"\nTraining complete. Adapter saved at: {os.path.join(OUTPUT_DIR, 'lora_adapter')}")

Using device: cuda
Loading SNLI dataset...
Training examples: 1997
Validation examples: 490
Perturbing training data...


Map:   0%|          | 0/1997 [00:00<?, ? examples/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Tokenizing datasets...


Map:   0%|          | 0/1997 [00:00<?, ? examples/s]

Map:   0%|          | 0/490 [00:00<?, ? examples/s]

trainable params: 1,920,003 || all params: 126,567,942 || trainable%: 1.5170


  trainer = Trainer(


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy
1,1.1068,1.128636,0.389796
2,0.9919,0.773078,0.661224
3,0.7946,0.670716,0.746939
4,0.7395,0.671466,0.734694
5,0.61,0.699146,0.765306
6,0.5228,0.773286,0.761224
7,0.4634,0.722397,0.793878
8,0.3683,0.797104,0.804082
9,0.2911,0.874498,0.797959
10,0.2941,0.876603,0.808163


Training complete.

--- Final Evaluation on Clean Validation Set ---



Evaluation Results:
Accuracy: 0.7469
Loss: 0.6707

Training complete. Adapter saved at: ./hybrid_autorepair_snli/lora_adapter


In [None]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6
