# MODEL PRUNING

In [3]:
#!pip install "urllib3<2" # Warning
from transformers import AutoTokenizer

#main_model = "google-bert/bert-base-uncased"
main_model = "textattack/bert-base-uncased-SST-2"

tokenizer = AutoTokenizer.from_pretrained(main_model) # We use the same tokenizer throughout all models. It becomes more important for distilliation

dataset_id="glue"
dataset_config="sst2"

from datasets import load_dataset

dataset = load_dataset(dataset_id,dataset_config)
#dataset = load_dataset("SetFit/sst2")
dataset

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 872
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1821
    })
})

In [4]:
def process(examples):
    tokenized_inputs = tokenizer(
        examples["sentence"], truncation=True, max_length=512
        #examples["text"], truncation=True, max_length=512
    )
    return tokenized_inputs

tokenized_datasets = dataset.map(process, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label","labels")

tokenized_datasets["test"].features

{'sentence': Value(dtype='string', id=None),
 'labels': ClassLabel(names=['negative', 'positive'], id=None),
 'idx': Value(dtype='int32', id=None),
 'input_ids': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
 'token_type_ids': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None),
 'attention_mask': Sequence(feature=Value(dtype='int8', id=None), length=-1, id=None)}

In [7]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false" # Warning

from transformers import AutoModelForSequenceClassification, DataCollatorWithPadding
from huggingface_hub import HfFolder


# create label2id, id2label dicts for nice outputs for the model
labels = tokenized_datasets["train"].features["labels"].names
#labels = ["negative", "positive"]
num_labels = len(labels)
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

pruned_model = AutoModelForSequenceClassification.from_pretrained(
    main_model,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

In [8]:
#!pip install torch-pruning

from transformers import AutoTokenizer, BertModel, BertForSequenceClassification
import torch
from transformers.models.bert.modeling_bert import BertSelfAttention, BertPooler
import torch_pruning as tp

#tokenizer = AutoTokenizer.from_pretrained(main_model)
#model = BertModel.from_pretrained(main_model)

hf_inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
example_inputs = {'input_ids': hf_inputs['input_ids'], 'token_type_ids': hf_inputs['token_type_ids'], 'attention_mask': hf_inputs['attention_mask']}

#outputs = model(**example_inputs)
#last_hidden_states = outputs.last_hidden_state

imp = tp.importance.MagnitudeImportance(p=2, group_reduction="mean")
base_macs, base_params = tp.utils.count_ops_and_params(pruned_model, example_inputs)
num_heads = {}

# All heads should be pruned simultaneously, so we group channels by head.
for m in pruned_model.modules():
    if isinstance(m, BertSelfAttention):
        num_heads[m.query] = m.num_attention_heads
        num_heads[m.key] = m.num_attention_heads
        num_heads[m.value] = m.num_attention_heads

pruner = tp.pruner.MetaPruner(
    pruned_model, 
    example_inputs, 
    global_pruning=False, # If False, a uniform pruning ratio will be assigned to different layers.
    importance=imp, # importance criterion for parameter selection
    iterative_steps=1, # the number of iterations to achieve target pruning ratio
    pruning_ratio=0.5,
    num_heads=num_heads,
    prune_head_dims=False,
    prune_num_heads=True,
    head_pruning_ratio=0.5,
    output_transform=lambda out: out.logits.sum(),
    ignored_layers=[pruned_model.bert.pooler],
)

for g in pruner.step(interactive=True):
    #print(g)
    g.prune()

# Modify the attention head size and all head size after pruning
for m in pruned_model.modules():
    if isinstance(m, BertSelfAttention):
        #print("Num heads: %d, head size: %d =>"%(m.num_attention_heads, m.attention_head_size))
        m.num_attention_heads = pruner.num_heads[m.query]
        m.attention_head_size = m.query.out_features // m.num_attention_heads
        m.all_head_size = m.query.out_features
        #print("Num heads: %d, head size: %d"%(m.num_attention_heads, m.attention_head_size))
        #print()
        
#for m in pruned_model.modules():
#    if isinstance(m, BertForSequenceClassification):
#        m.classifier = torch.nn.Linear(m.classifier.in_features, 2)  # Set out_features = 2 for binary classification
#        print(f"Modified classifier output to {m.classifier.out_features}")

for m in pruned_model.modules():
    if isinstance(m, BertPooler):
        m.dense = torch.nn.Linear(384, 384)  # Modify the pooler layer to use 384 input/output features
        print(f"Modified pooler dense layer to in_features=384, out_features=384")

for m in pruned_model.modules():
    if isinstance(m, BertForSequenceClassification):
        m.classifier = torch.nn.Linear(384, 2)  # Modify classifier to match the new pooler output size
        print(f"Modified classifier output to {m.classifier.out_features}")



print(pruned_model)
test_output = pruned_model(**example_inputs)
pruned_macs, pruned_params = tp.utils.count_ops_and_params(pruned_model, example_inputs)
print("Base MACs: %f M, Pruned MACs: %f M"%(base_macs/1e6, pruned_macs/1e6))
print("Base Params: %f M, Pruned Params: %f M"%(base_params/1e6, pruned_params/1e6))

# Calculate the difference in parameters before and after pruning
params_diff = base_params - pruned_params

# Print the comparison
print("Difference in Params: %f M"%(params_diff/1e6))



Modified pooler dense layer to in_features=384, out_features=384
Modified classifier output to 2
BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 384, padding_idx=0)
      (position_embeddings): Embedding(512, 384)
      (token_type_embeddings): Embedding(2, 384)
      (LayerNorm): LayerNorm((384,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=384, out_features=384, bias=True)
              (key): Linear(in_features=384, out_features=384, bias=True)
              (value): Linear(in_features=384, out_features=384, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(

In [16]:
# Pruned model path without training
pruned_model_path = "./pruned_model"

In [None]:
# Save pruned_model without training
prune_trainer.save_model(pruned_model_path)

# Prune Basic Training example

In [27]:
# Evalutation METRICS
#%pip install evaluate
#%pip install scikit-learn
from transformers import AutoTokenizer, BertModel, BertForSequenceClassification, EarlyStoppingCallback
import torch
from transformers.models.bert.modeling_bert import BertSelfAttention, BertPooler
import torch_pruning as tp
from evaluate import load
import numpy as np
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

# Load the accuracy metric
accuracy_metric = load("accuracy")
precision_metric = load("precision")
recall_metric = load("recall")
f1_metric = load("f1")


# Define metrics function
#def compute_metrics(eval_pred):
#    predictions, labels = eval_pred
#    predictions = np.argmax(predictions, axis=1)
#    acc = accuracy_metric.compute(predictions=predictions, references=labels)
#    return {
#        "accuracy": acc["accuracy"],
#    }


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    acc = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, average="macro", zero_division=0)
    recall = recall_score(labels, predictions, average="macro", zero_division=0)
    f1 = f1_score(labels, predictions, average="macro", zero_division=0)
    return {
        "accuracy": acc,#["accuracy"],
        "precision": precision,#["precision"],
        "recall": recall,#["recall"],
        "f1": f1,#["f1"]
    }


if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
#device = torch.device("mps")
print(f"Using device: {device}")

#%pip show torch


training_args = TrainingArguments(
    output_dir="bla",
    num_train_epochs=10,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    fp16=False,
    learning_rate=3e-5,
    seed=33,
    # logging & evaluation strategies
    logging_dir=f"bla/logs",
    logging_strategy="epoch", # to get more information to TB
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
    #metric_for_best_model="accuracy",
    metric_for_best_model="eval_loss",  # Monitor validation loss
    greater_is_better=False, # For validation loss, less is better
    #weight_decay=0.5
)

# define data_collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

early_stopping = EarlyStoppingCallback(early_stopping_patience=2)

prune_trainer = Trainer (
    pruned_model,
    training_args,
    train_dataset=tokenized_datasets["train"].shuffle(seed=42).select(range(1000)),
    eval_dataset=tokenized_datasets["validation"],
    #train_dataset=tokenized_datasets["validation"].shuffle(seed=42),
    #eval_dataset=tokenized_datasets["train"],#.shuffle(seed=42).select(range(1000)),
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    callbacks=[early_stopping]
)

Using device: mps


  prune_trainer = Trainer (


In [None]:
prune_trainer.train()

In [17]:
# Pruned model path with training
pruned_model_trained_path = "./pruned_model_trained"

In [None]:
# Save pruned model with training
prune_trainer.save_model(pruned_model_trained_path)

In [19]:
# Load pruned trained and untrained models
from transformers import AutoModelForSequenceClassification, BertConfig

# Specify the local directory where the model is saved
pruned_model_untrained_config = BertConfig.from_pretrained(f"{pruned_model_path}/config.json")
pruned_model_untrained_config.hidden_size = 384 # Due to the model saving, this is not set correctly; overrides wrong value from config
pruned_model_untrained_config.intermediate_size = 1536 # Due to the model saving, this is not set correctly; overrides wrong value from config

pruned_model_trained_config = BertConfig.from_pretrained(f"{pruned_model_trained_path}/config.json")
pruned_model_trained_config.hidden_size = 384
pruned_model_trained_config.intermediate_size = 1536

# Load models
pruned_model_untrained = AutoModelForSequenceClassification.from_pretrained(pruned_model_path, config=pruned_model_untrained_config, local_files_only=True, ignore_mismatched_sizes=True)
pruned_model_trained = AutoModelForSequenceClassification.from_pretrained(pruned_model_trained_path, config=pruned_model_trained_config, local_files_only=True, ignore_mismatched_sizes=True)

# Test Model Performance

In [24]:
from evaluate import evaluator
from transformers import pipeline
task_evaluator = evaluator("text-classification")
data = load_dataset("glue","sst2", split="validation").shuffle(seed=42)
main_pipeline = pipeline("text-classification", model=main_model,device=-1) #set device to cpu to make it comparable

eval_results_pruned_model_untrained = task_evaluator.compute(
    model_or_pipeline=pruned_model_untrained,
    tokenizer=tokenizer,
    data=data,
    input_column="sentence",
    label_column="label",
    label_mapping={"negative": 0, "positive": 1}
)

eval_results_pruned_model_trained = task_evaluator.compute(
    model_or_pipeline=pruned_model_trained,
    tokenizer=tokenizer,
    data=data,
    input_column="sentence",
    label_column="label",
    label_mapping={"negative": 0, "positive": 1}
)

eval_results_main_model = task_evaluator.compute(
    model_or_pipeline=main_pipeline,
    tokenizer=tokenizer,
    data=data,
    input_column="sentence",
    label_column="label",
    label_mapping={"LABEL_0": 0, "LABEL_1": 1}
)

print(eval_results_pruned_model_untrained)
print(eval_results_pruned_model_trained)
print(eval_results_main_model)

Device set to use cpu
Device set to use cpu
Device set to use cpu


{'accuracy': 0.4908256880733945, 'total_time_in_seconds': 4.8765705409998645, 'samples_per_second': 178.81418768961558, 'latency_in_seconds': 0.005592397409403514}
{'accuracy': 0.6995412844036697, 'total_time_in_seconds': 5.378949250000005, 'samples_per_second': 162.11344622743914, 'latency_in_seconds': 0.006168519782110098}
{'accuracy': 0.9243119266055045, 'total_time_in_seconds': 16.315414833999967, 'samples_per_second': 53.44638851491686, 'latency_in_seconds': 0.01871033811238528}


# START KNOWLEDGE DISTILLATION

## Installation

In [28]:
#%pip install "pytorch==1.10.1"
#%pip install transformers datasets tensorboard --upgrade
#!sudo apt-get install git-lfs
#!brew install git-lfs


In [29]:
from huggingface_hub import login

login()  # displays a widget in a notebook, a prompt in terminal otherwise
#

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [30]:
student_id = "google/bert_uncased_L-2_H-128_A-2"
teacher_id = main_model # We use the same model as in the begining

# name for our repository on the hub
repo_name = "tiny-bert-sst2-distilled_combined"

## Distilling the model using `PyTorch` and `DistillationTrainer`


Now that our `dataset` is processed, we can distill it. Normally, when fine-tuning a transformer model using PyTorch you should go with the `Trainer-API`. The [Trainer](https://huggingface.co/docs/transformers/v4.16.1/en/main_classes/trainer#transformers.Trainer) class provides an API for feature-complete training in PyTorch for most standard use cases.

In our example we cannot use the `Trainer` out-of-the-box, since we need to pass in two models, the `Teacher` and the `Student` and compute the loss for both. But we can subclass the `Trainer` to create a `DistillationTrainer` which will take care of it and only overwrite the [compute_loss](https://github.com/huggingface/transformers/blob/c4ad38e5ac69e6d96116f39df789a2369dd33c21/src/transformers/trainer.py#L1962) method as well as the `init` method. In addition to this we also need to subclass the `TrainingArguments` to include the our distillation hyperparameters.


In [36]:
%%writefile custom_args.py
# Write class to external class to work
from transformers import TrainingArguments

class DistillationTrainingArguments(TrainingArguments):
    def __init__(self, *args, alpha=0.5, temperature=2.0, **kwargs):
        super().__init__(*args, **kwargs)
        self.alpha = alpha
        self.temperature = temperature

Overwriting custom_args.py


In [38]:
#%pip install torch torchvision torchaudio
import importlib
import custom_args
importlib.reload(custom_args)
from custom_args import DistillationTrainingArguments
from transformers import TrainingArguments, Trainer
import torch
import torch.nn as nn
import torch.nn.functional as F

#class DistillationTrainingArguments(TrainingArguments):
#    def __init__(self, *args, alpha=0.5, temperature=2.0, **kwargs):
#        super().__init__(*args, **kwargs)
#
#        self.alpha = alpha
#        self.temperature = temperature

class DistillationTrainer(Trainer):
    def __init__(self, *args, teacher_model=None, accelerator=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.teacher = teacher_model

        self.accelerator = accelerator if accelerator else Accelerator()
        # Place teacher model on the correct device using the accelerator
        self.teacher = self.accelerator.prepare(self.teacher)
        
        # place teacher on same device as student
        #self._move_model_to_device(self.teacher,self.model.device)
        self.teacher.eval()

def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
    # compute student output
    outputs_student = model(**inputs)
    student_loss = outputs_student.loss

    # compute teacher output
    with torch.no_grad():
        outputs_teacher = self.teacher(**inputs)

    # assert size
    assert outputs_student.logits.size() == outputs_teacher.logits.size()

    # Soften probabilities and compute distillation loss
    loss_function = nn.KLDivLoss(reduction="batchmean")
    loss_logits = (loss_function(
        F.log_softmax(outputs_student.logits / self.args.temperature, dim=-1),
        F.softmax(outputs_teacher.logits / self.args.temperature, dim=-1)) * (self.args.temperature ** 2))

    # Return weighted student loss
    loss = self.args.alpha * student_loss + (1. - self.args.alpha) * loss_logits
    return (loss, outputs_student) if return_outputs else loss


### Hyperparameter Definition, Model Loading

In [39]:
#%pip install transformers[torch]
#%pip install "accelerate==1.2.1"

from transformers import AutoModelForSequenceClassification, DataCollatorWithPadding
from huggingface_hub import HfFolder

# create label2id, id2label dicts for nice outputs for the model
labels = tokenized_datasets["train"].features["labels"].names
num_labels = len(labels)
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

# define training args
training_args = DistillationTrainingArguments(
    output_dir=repo_name,
    num_train_epochs=2,
    per_device_train_batch_size=128,
    per_device_eval_batch_size=128,
    fp16=False,
    learning_rate=6e-5,
    seed=33,
    # logging & evaluation strategies
    logging_dir=f"{repo_name}/logs",
    logging_strategy="epoch", # to get more information to TB
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="tensorboard",
    # push to hub parameters
    push_to_hub=True,
    hub_strategy="every_save",
    hub_model_id=repo_name,
    hub_token=HfFolder.get_token(),
    # distilation parameters
    alpha=0.5,
    temperature=4.0
    )


# define data_collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# define model
teacher_model = AutoModelForSequenceClassification.from_pretrained(
    teacher_id,
    num_labels=num_labels,
    id2label=id2label,
    label2id=label2id,
)

# define student model
#student_model = AutoModelForSequenceClassification.from_pretrained(
#    student_id,
#    num_labels=num_labels,
#    id2label=id2label,
#    label2id=label2id,
#)



## Training

Start training with calling `trainer.train`

In [41]:
#if torch.backends.mps.is_available():
#    device = torch.device("mps")
#else:
#    device = torch.device("cpu")
device = torch.device("cpu")
print(f"Using device: {device}")

#%pip show torch


Using device: cpu


In [42]:
from accelerate import Accelerator
accelerator = Accelerator(cpu=True)
trainer = DistillationTrainer(
    pruned_model,
    training_args,
    teacher_model=teacher_model,
    train_dataset=tokenized_datasets["train"].select(range(10000)),
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    accelerator=accelerator
)

  super().__init__(*args, **kwargs)


start training using the `DistillationTrainer`.

In [43]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.655,0.564061,0.693807,0.72389,0.690315,0.68065
2,0.4572,0.516435,0.733945,0.746817,0.73175,0.729152


TrainOutput(global_step=158, training_loss=0.5561002900328817, metrics={'train_runtime': 128.6277, 'train_samples_per_second': 155.487, 'train_steps_per_second': 1.228, 'total_flos': 127708696981824.0, 'train_loss': 0.5561002900328817, 'epoch': 2.0})

## Hyperparameter Search for Distillation parameter `alpha` & `temperature` with optuna

The parameter `alpha` & `temparature` in the `DistillationTrainer` can also be used when doing Hyperparamter search to maxizime our "knowledge extraction". As Hyperparamter Optimization framework are we using [Optuna](https://optuna.org/), which has a integration into the `Trainer-API`. Since we the `DistillationTrainer` is a sublcass of the `Trainer` we can use the `hyperparameter_search` without any code changes.


In [None]:
#%pip install optuna

To do Hyperparameter Optimization using `optuna` we need to define our hyperparameter space. In this example we are trying to optimize/maximize the `num_train_epochs`, `learning_rate`, `alpha` & `temperature` for our `student_model`.

In [44]:
def hp_space(trial):
    return {
      "num_train_epochs": trial.suggest_int("num_train_epochs", 2, 2),
      "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-3 ,log=True),
      "alpha": trial.suggest_float("alpha", 0, 1),
      "temperature": trial.suggest_int("temperature", 2, 30),
      }

To start our Hyperparmeter search we just need to call `hyperparameter_search` provide our `hp_space` and number of trials to run.

In [None]:
def student_init():
    return pruned_model
#    return AutoModelForSequenceClassification.from_pretrained(
#        student_id,
#        num_labels=num_labels,
#        id2label=id2label,
#        label2id=label2id
#    )

trainer = DistillationTrainer(
    model_init=student_init,
    args=training_args,
    teacher_model=teacher_model,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    accelerator=accelerator
)
best_run = trainer.hyperparameter_search(
    n_trials=2,
    direction="maximize",
    hp_space=hp_space
)

print(best_run)

Since optuna is just finding the best hyperparameters we need to fine-tune our model again using the best hyperparamters from the `best_run`.

In [None]:
# overwrite initial hyperparameters with from the best_run
for k,v in best_run.hyperparameters.items():
    setattr(training_args, k, v)

# Define a new repository to store our distilled model
best_model_ckpt = "tiny-bert-best"
training_args.output_dir = best_model_ckpt

We have overwritten the default Hyperparameters with the one from our `best_run` and can start the training now.

In [None]:
# Create a new Trainer with optimal parameters
optimal_trainer = DistillationTrainer(
    pruned_model,
    training_args,
    teacher_model=teacher_model,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

optimal_trainer.train()


# save best model, metrics and create model card
trainer.create_model_card(model_name=training_args.hub_model_id)
trainer.push_to_hub()

In [None]:
from huggingface_hub import HfApi

whoami = HfApi().whoami()
username = whoami['name']

print(f"https://huggingface.co/{username}/{repo_name}")

In [None]:
print(pruned_model.config.id2label)


In [45]:
from transformers import pipeline
import torch

# Assuming teacher_model and tokenizer are already loaded

# Initialize the classifier pipeline with CPU device
device = torch.device("cpu")
classifier = pipeline("text-classification", model=teacher_model, tokenizer=tokenizer, device=-1)  # -1 means CPU

# Ensure the model's id2label mapping is correct
teacher_model.config.id2label = {0: 'negative', 1: 'positive'}
teacher_model.config.label2id = {'negative': 0, 'positive': 1}

# Example prompts
prompts = [
    "This movie was amazing!",
    "I hated the ending.",
    "The acting was mediocre.",
    "It was an okay film.",
    "A truly captivating experience!"
]

# Process each prompt and print results
for prompt in prompts:
    # Run classification
    result = classifier(prompt)[0]  # Get the result for the first prompt
    
    # Check the output result
    print(f"Prompt: {prompt}")
    print(f"Raw Result: {result}")
    
    # Ensure the label is correctly mapped
    label = result['label']
    
    # Convert to the correct label if needed (this step may be redundant)
    if isinstance(label, str):  # If it's a string, print it directly
        print(f"Label: {label}, Score: {result['score']}\n")
    else:  # If it's an index, use id2label to convert
        label = teacher_model.config.id2label[label]
        print(f"Label: {label}, Score: {result['score']}\n")


Device set to use mps:0


Prompt: This movie was amazing!
Raw Result: {'label': 'positive', 'score': 0.9997144341468811}
Label: positive, Score: 0.9997144341468811

Prompt: I hated the ending.
Raw Result: {'label': 'negative', 'score': 0.9988741278648376}
Label: negative, Score: 0.9988741278648376

Prompt: The acting was mediocre.
Raw Result: {'label': 'negative', 'score': 0.99798583984375}
Label: negative, Score: 0.99798583984375

Prompt: It was an okay film.
Raw Result: {'label': 'positive', 'score': 0.9993498921394348}
Label: positive, Score: 0.9993498921394348

Prompt: A truly captivating experience!
Raw Result: {'label': 'positive', 'score': 0.9997534155845642}
Label: positive, Score: 0.9997534155845642



In [46]:
from transformers import pipeline
import torch

# Assuming student_model and tokenizer are already loaded

# Initialize the classifier pipeline with CPU device
device = torch.device("cpu")
classifier = pipeline("text-classification", model=pruned_model, tokenizer=tokenizer, device=-1)  # -1 means CPU

# Ensure the model's id2label mapping is correct
pruned_model.config.id2label = {0: 'negative', 1: 'positive'}
pruned_model.config.label2id = {'negative': 0, 'positive': 1}

# Example prompts
prompts = [
    "This movie was amazing!",
    "I hated the ending.",
    "The acting was mediocre.",
    "It was an okay film.",
    "A truly captivating experience!"
]

# Process each prompt and print results
for prompt in prompts:
    # Run classification
    result = classifier(prompt)[0]  # Get the result for the first prompt
    
    # Check the output result
    print(f"Prompt: {prompt}")
    print(f"Raw Result: {result}")
    
    # Ensure the label is correctly mapped
    label = result['label']
    
    # Convert to the correct label if needed (this step may be redundant)
    if isinstance(label, str):  # If it's a string, print it directly
        print(f"Label: {label}, Score: {result['score']}\n")
    else:  # If it's an index, use id2label to convert
        label = pruned_model.config.id2label[label]
        print(f"Label: {label}, Score: {result['score']}\n")


Device set to use mps:0


Prompt: This movie was amazing!
Raw Result: {'label': 'positive', 'score': 0.9758805632591248}
Label: positive, Score: 0.9758805632591248

Prompt: I hated the ending.
Raw Result: {'label': 'negative', 'score': 0.839915931224823}
Label: negative, Score: 0.839915931224823

Prompt: The acting was mediocre.
Raw Result: {'label': 'negative', 'score': 0.8013845086097717}
Label: negative, Score: 0.8013845086097717

Prompt: It was an okay film.
Raw Result: {'label': 'positive', 'score': 0.9175781607627869}
Label: positive, Score: 0.9175781607627869

Prompt: A truly captivating experience!
Raw Result: {'label': 'positive', 'score': 0.9678720235824585}
Label: positive, Score: 0.9678720235824585

