In [2]:
import sys
import os
from logging import getLogger, ERROR

notebook_dir = os.getcwd()
parent_dir = os.path.dirname(notebook_dir)
# Fix module imports
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

# Disable Hugging Face warnings
getLogger("transformers.modeling_utils").setLevel(ERROR)

In [3]:
from model.qgpt2_models import QGPT2ClassificationModel
from pandas import read_csv, DataFrame
from datasets import Dataset, load_metric
from sklearn.metrics import f1_score,precision_recall_fscore_support

from transformers import (
    GPT2ForSequenceClassification,
    GPT2Tokenizer,
    Trainer,
    TrainingArguments,
)

gpt2_model = GPT2ForSequenceClassification.from_pretrained("gpt2", num_labels=3)
tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def tokenize_function(examples, padding="max_length"):
    return tokenizer(examples["text"], padding=padding, truncation=True, max_length=128)


tokenizer.pad_token = tokenizer.eos_token
gpt2_model.config.pad_token_id = gpt2_model.config.eos_token_id

In [5]:
df = read_csv("../data/Tweets.csv")
df["airline_sentiment"] = df["airline_sentiment"].replace(
    ["negative", "neutral", "positive"], [0, 1, 2]
)

dataset = Dataset.from_pandas(df)
dataset = dataset.select_columns(["text", "airline_sentiment"])
dataset = dataset.rename_column("airline_sentiment", "label")

  df["airline_sentiment"] = df["airline_sentiment"].replace(


In [6]:
train_and_eval = dataset.train_test_split(test_size=0.1, seed=42)

train_ds = train_and_eval["train"]
eval_ds = train_and_eval["test"]

gpt2_train_ds = train_ds.map(tokenize_function, batched=True)
gpt2_eval_ds = eval_ds.map(tokenize_function, batched=True)

gpt2_train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
gpt2_eval_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

Map: 100%|██████████| 13176/13176 [00:02<00:00, 5892.40 examples/s]
Map: 100%|██████████| 1464/1464 [00:00<00:00, 5192.25 examples/s]


In [7]:
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    f1 = f1_score(
    labels, preds, average="macro"
)
    _, _, f1s, _ = precision_recall_fscore_support(labels, preds, average=None)
    return {
        "Macro F1": round(f1,4),
        'F1 for negative': round(f1s[0],4),
        'F1 for neutral': round(f1s[1],4),
        'F1 for postive': round(f1s[2],4),
    }

In [8]:
training_args = TrainingArguments(
    report_to="none",
    output_dir="/data/bz620/model_outputs",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
)

trainer = Trainer(
    model=gpt2_model,
    args=training_args,
    train_dataset=gpt2_train_ds,
    eval_dataset=gpt2_eval_ds,
    compute_metrics=compute_metrics,
)

trainer.train()

Step,Training Loss
500,0.5628
1000,0.4316
1500,0.3708
2000,0.3233


KeyboardInterrupt: 

In [15]:
def predict_labels(model, df):
    return model.predict(df).predictions.argmax(-1)


gpt2_preds = predict_labels(trainer, gpt2_train_ds)

teacher_results = DataFrame(
    {
        "text": gpt2_train_ds["text"],
        "true_label": gpt2_train_ds["label"],
        "gpt2_preds": gpt2_preds,
    }
)

  f1 = load_metric("f1").compute(
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [9]:
gpt2_model.save_pretrained("./saved_model")

In [8]:
# Load pre-trained weights on our custom GPT-2
fhe_model = QGPT2ClassificationModel.from_pretrained(
    "./saved_model", n_bits=8, use_cache=False, num_labels=3
)
fhe_model.config.pad_token_id = fhe_model.config.eos_token_id

In [17]:
fhe_training_args = TrainingArguments(
    report_to="none",
    output_dir="/data/bz620/model_outputs",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    use_cpu=True,
)

fhe_trainer = Trainer(
    model=fhe_model,
    args=fhe_training_args,
    train_dataset=gpt2_train_ds,
    eval_dataset=gpt2_eval_ds,
    compute_metrics=compute_metrics,
)

teacher_results["fhe_preds"] = predict_labels(fhe_trainer, gpt2_train_ds)

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


In [18]:
teacher_results.to_csv("teacher_results.csv", sep=",", encoding="utf-8")
gpt2_f1 = f1_score(
    teacher_results["true_label"], teacher_results["gpt2_preds"], average="macro"
)
fhe_f1 = f1_score(
    teacher_results["true_label"], teacher_results["fhe_preds"], average="macro"
)

In [21]:
print(f"We achieved an f1 score of {gpt2_f1:.4f} on the vanilla model")
print(f"We achieved an f1 score of {fhe_f1:.4f} on the FHE compliant model")

We achieved an f1 score of 0.8792 on the vanilla model
We achieved an f1 score of 0.8516 on the FHE compliant model


### Training the student model with teacher labels

In [14]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer

# Load the student model and tokenizer
student_model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=3
)
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

student_model.config.pad_token_id = student_model.config.eos_token_id

# Mark parent predictions as actual true labels and remove unused labels
teacher_results = read_csv("teacher_results.csv")
teacher_results["label"] = teacher_results["fhe_preds"]
teacher_results.drop(["true_label", "gpt2_preds"], axis=1)

# Tokenize the inputs
student_train = Dataset.from_pandas(teacher_results)
student_train = student_train.map(tokenize_function, batched=True)
student_train.set_format("torch", columns=["input_ids", "attention_mask", "label"])


eval_ds = train_and_eval["test"]
eval_ds = eval_ds.map(tokenize_function, batched=True)
eval_ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])


student_training_args = TrainingArguments(
    report_to="none",
    output_dir="/data/bz620/model_outputs",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
)

student_trainer = Trainer(
    model=student_model,
    args=student_training_args,
    train_dataset=student_train,
    eval_dataset=eval_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


student_trainer.train()

Map: 100%|██████████| 13176/13176 [00:04<00:00, 3079.43 examples/s]
Map: 100%|██████████| 1464/1464 [00:00<00:00, 2956.81 examples/s]


Step,Training Loss
500,0.4176
1000,0.2586
1500,0.1801
2000,0.1152


TrainOutput(global_step=2472, training_loss=0.21345024047160227, metrics={'train_runtime': 185.3752, 'train_samples_per_second': 213.232, 'train_steps_per_second': 13.335, 'total_flos': 1309066178439168.0, 'train_loss': 0.21345024047160227, 'epoch': 3.0})

In [15]:
student_trainer.evaluate()

{'eval_loss': 0.7473267912864685,
 'eval_Macro F1': 0.799,
 'eval_F1 for negative': 0.909,
 'eval_F1 for neutral': 0.6736,
 'eval_F1 for postive': 0.8145,
 'eval_runtime': 2.065,
 'eval_samples_per_second': 708.967,
 'eval_steps_per_second': 44.553,
 'epoch': 3.0}

### Training the student model with true labels

In [16]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer

# Load the student model and tokenizer
student_model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=3
)
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")

# tokenizer.pad_token = tokenizer.eos_token
student_model.config.pad_token_id = student_model.config.eos_token_id

student_train = train_and_eval["train"]
student_train = student_train.map(tokenize_function, batched=True)
student_train.set_format("torch", columns=["input_ids", "attention_mask", "label"])


eval_ds = train_and_eval["test"]
eval_ds = eval_ds.map(tokenize_function, batched=True)
eval_ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])


student_training_args = TrainingArguments(
    report_to="none",
    output_dir="/data/bz620/model_outputs",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_strategy="epoch",
)

student_trainer = Trainer(
    model=student_model,
    args=student_training_args,
    train_dataset=student_train,
    eval_dataset=eval_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

student_trainer.train()

Map: 100%|██████████| 13176/13176 [00:04<00:00, 2971.06 examples/s]
Map: 100%|██████████| 1464/1464 [00:00<00:00, 2802.27 examples/s]


Step,Training Loss
500,0.5598
1000,0.4116
1500,0.3261
2000,0.2635


TrainOutput(global_step=2472, training_loss=0.36185037125275743, metrics={'train_runtime': 199.829, 'train_samples_per_second': 197.809, 'train_steps_per_second': 12.371, 'total_flos': 1309066178439168.0, 'train_loss': 0.36185037125275743, 'epoch': 3.0})

In [18]:
student_trainer.evaluate()

{'eval_loss': 0.4384877681732178,
 'eval_Macro F1': 0.8005,
 'eval_F1 for negative': 0.9135,
 'eval_F1 for neutral': 0.6908,
 'eval_F1 for postive': 0.7972,
 'eval_runtime': 2.223,
 'eval_samples_per_second': 658.581,
 'eval_steps_per_second': 41.386,
 'epoch': 3.0}