In [1]:
%pip install -U transformers 
%pip install -U datasets 
%pip install -U accelerate 
%pip install -U peft 
%pip install -U trl 
%pip install -U bitsandbytes

Collecting transformers
  Downloading transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.46.3-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m61.6 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.45.1
    Uninstalling transformers-4.45.1:
      Successfully uninstalled transformers-4.45.1
Successfully installed transformers-4.46.3
Note: you may need to restart the kernel to use updated packages.
Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m

## Necessary imports

In [2]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging
    
    )
    
from peft import (LoraConfig,
                 PeftModel,
                 prepare_model_for_kbit_training,
                 get_peft_model)

import os 
import torch
import wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

## Huggingface login

In [3]:
# insert token

!huggingface-cli login --token=

  pid, fd = os.forkpty()


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Install evaluate library

In [4]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


## SST2 dataset

In [5]:
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification, 
    TrainingArguments, 
    Trainer
)
from datasets import load_dataset
import numpy as np
import evaluate

dataset = load_dataset("glue", "sst2")

dataset

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 872
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 1821
    })
})

In [12]:
from datasets import concatenate_datasets

combined_dataset = concatenate_datasets([dataset["train"], dataset["validation"], dataset["test"]])

combined_dataset

Dataset({
    features: ['sentence', 'label', 'idx'],
    num_rows: 70042
})

In [13]:
dataset = combined_dataset.train_test_split(test_size=0.2, seed=1)

In [14]:
dataset

DatasetDict({
    train: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 56033
    })
    test: Dataset({
        features: ['sentence', 'label', 'idx'],
        num_rows: 14009
    })
})

# Analyzing the base model

In [15]:
base_model_llama = "meta-llama/Llama-3.2-1B"
tokenizer_llama = AutoTokenizer.from_pretrained(base_model_llama, trust_remote_code=True)
tokenizer_llama.pad_token = tokenizer_llama.eos_token

## Calculating number of parameters

In [16]:
def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        # if not parameter.requires_grad:
        #     continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Params: {total_params}")
    return total_params

### Base model parameters

In [18]:
from prettytable import PrettyTable
count_parameters(AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B"))

+-------------------------------------------------+------------+
|                     Modules                     | Parameters |
+-------------------------------------------------+------------+
|            model.embed_tokens.weight            | 262668288  |
|      model.layers.0.self_attn.q_proj.weight     |  4194304   |
|      model.layers.0.self_attn.k_proj.weight     |  1048576   |
|      model.layers.0.self_attn.v_proj.weight     |  1048576   |
|      model.layers.0.self_attn.o_proj.weight     |  4194304   |
|       model.layers.0.mlp.gate_proj.weight       |  16777216  |
|        model.layers.0.mlp.up_proj.weight        |  16777216  |
|       model.layers.0.mlp.down_proj.weight       |  16777216  |
|      model.layers.0.input_layernorm.weight      |    2048    |
|  model.layers.0.post_attention_layernorm.weight |    2048    |
|      model.layers.1.self_attn.q_proj.weight     |  4194304   |
|      model.layers.1.self_attn.k_proj.weight     |  1048576   |
|      model.layers.1.sel

1235814400

### AutoModelForSequenceClassification model parameters

In [20]:
model_llama = AutoModelForSequenceClassification.from_pretrained(base_model_llama, num_labels=2)

count_parameters(model_llama)

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


+-------------------------------------------------+------------+
|                     Modules                     | Parameters |
+-------------------------------------------------+------------+
|            model.embed_tokens.weight            | 262668288  |
|      model.layers.0.self_attn.q_proj.weight     |  4194304   |
|      model.layers.0.self_attn.k_proj.weight     |  1048576   |
|      model.layers.0.self_attn.v_proj.weight     |  1048576   |
|      model.layers.0.self_attn.o_proj.weight     |  4194304   |
|       model.layers.0.mlp.gate_proj.weight       |  16777216  |
|        model.layers.0.mlp.up_proj.weight        |  16777216  |
|       model.layers.0.mlp.down_proj.weight       |  16777216  |
|      model.layers.0.input_layernorm.weight      |    2048    |
|  model.layers.0.post_attention_layernorm.weight |    2048    |
|      model.layers.1.self_attn.q_proj.weight     |  4194304   |
|      model.layers.1.self_attn.k_proj.weight     |  1048576   |
|      model.layers.1.sel

1235818496

## Fine Tuning the score layer

In [5]:
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification, 
    TrainingArguments, 
    Trainer
)
from datasets import load_dataset
import numpy as np
import evaluate

dataset = load_dataset("glue", "sst2")

base_model = "meta-llama/Llama-3.2-1B"  # Replace with the correct model name
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token  # Use EOS token as pad token if missing

def preprocess_data(examples):
    tokens = tokenizer(
        examples["sentence"],
        padding="max_length",
        truncation=False,  # Truncate sequences exceeding max_length
        return_overflowing_tokens=True,
        max_length=128,  # Adjust based on the model's capacity
        stride=64
    )
    tokens["labels"] = examples["label"]
    return tokens

tokenized_datasets = dataset.map(preprocess_data, batched=True)

tokenized_datasets = tokenized_datasets.remove_columns(["sentence", "idx"])
tokenized_datasets.set_format("torch")

model = AutoModelForSequenceClassification.from_pretrained(base_model, num_labels=2)

for name, param in model.named_parameters():
    if "score" not in name:  # The classifier layer has "score" in its name
        param.requires_grad = False

metric = evaluate.load("accuracy")

from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average="binary")
    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

from transformers import TrainerCallback

class CustomLoggingCallback(TrainerCallback):
    def __init__(self, log_interval=300):
        super().__init__()
        self.log_interval = log_interval

    def on_step_end(self, args, state, control, **kwargs):
        if state.global_step % self.log_interval == 0:
            print(f"Logging metrics at step {state.global_step}:")
            print(f"Loss: {state.log_history[-1]['loss']}")
            print(f"Learning rate: {state.log_history[-1].get('learning_rate', 'N/A')}")
            print(f"Metrics: {state.log_history[-1]}")

from transformers import Trainer, DataCollatorForLanguageModeling, TrainingArguments, TrainerCallback

class ModelSaveCallback(TrainerCallback):
    def __init__(self, save_interval=0.25):
        super().__init__()
        self.save_interval = save_interval  # Interval to save model (fraction of epoch)
        self.last_saved_step = 0

    def on_step_end(self, args, state, control, **kwargs):
        current_epoch_fraction = state.global_step / state.max_steps * args.num_train_epochs

        if (current_epoch_fraction - self.last_saved_step) >= self.save_interval:
            self.last_saved_step = current_epoch_fraction  # Update the last saved step

            model_save_path = f"{wandb.run.dir}/model_epoch_{current_epoch_fraction:.2f}"
            model.save_pretrained(model_save_path)

            wandb.save(f"{model_save_path}/*")  # Save all files in the directory to W&B
            print(f"Model saved at {current_epoch_fraction:.2f} epochs")

training_args = TrainingArguments(
    output_dir="./sst2_llama_model2",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,  # Adjust based on available resources
    per_device_eval_batch_size=16,
    # gradient_accumulation_steps=4,  # Accumulate gradients over 4 steps
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    fp16=True,  # Enable mixed precision training
    save_total_limit=2,
    load_best_model_at_end=True,
    report_to=None  # Disable W&B for simplicity; use "wandb" if enabled
)

model.config.pad_token_id = tokenizer.pad_token_id

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    # callbacks=[ModelSaveCallback(save_interval=0.25), CustomLoggingCallback(log_interval=100)],
)

trainer.train()

eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")

model.save_pretrained("./sst2_llama_model2")
tokenizer.save_pretrained("./sst2_llama_model2")

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/50.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/301 [00:00<?, ?B/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/843 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

  trainer = Trainer(
[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114478766666656, max=1.0…

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4273,0.419555,0.811927,0.816742,0.813063,0.814898
2,0.3672,0.381401,0.838303,0.846682,0.833333,0.839955
3,0.3761,0.372939,0.845183,0.85034,0.844595,0.847458


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Evaluation Results: {'eval_loss': 0.372938871383667, 'eval_accuracy': 0.8451834862385321, 'eval_precision': 0.8503401360544217, 'eval_recall': 0.8445945945945946, 'eval_f1': 0.847457627118644, 'eval_runtime': 48.3783, 'eval_samples_per_second': 18.025, 'eval_steps_per_second': 0.579, 'epoch': 3.0}


('./sst2_llama_model2/tokenizer_config.json',
 './sst2_llama_model2/special_tokens_map.json',
 './sst2_llama_model2/tokenizer.json')

In [65]:
model.config

LlamaConfig {
  "_name_or_path": "meta-llama/Llama-3.2-1B",
  "architectures": [
    "LlamaForSequenceClassification"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "head_dim": 64,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 8192,
  "max_position_embeddings": 131072,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 16,
  "num_key_value_heads": 8,
  "pad_token_id": 128001,
  "pretraining_tp": 1,
  "problem_type": "single_label_classification",
  "rms_norm_eps": 1e-05,
  "rope_scaling": {
    "factor": 32.0,
    "high_freq_factor": 4.0,
    "low_freq_factor": 1.0,
    "original_max_position_embeddings": 8192,
    "rope_type": "llama3"
  },
  "rope_theta": 500000.0,
  "tie_word_embeddings": true,
  "torch_dtype": "float32",
  "transformers_version": "4.46.3",
  "use_cache": true,
  "vocab_size": 128256
}

In [66]:
model.eval()

LlamaForSequenceClassification(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 2048)
    (layers): ModuleList(
      (0-15): 16 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=512, bias=False)
          (v_proj): Linear(in_features=2048, out_features=512, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (up_proj): Linear(in_features=2048, out_features=8192, bias=False)
          (down_proj): Linear(in_features=8192, out_features=2048, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-05)
      )
   

## Upload the model to huggingface

### Create a new repo

In [34]:
from huggingface_hub import create_repo

repo_id = "SumeetSawale/nlp_a3_sst_working"  # Update with your desired repository name
create_repo(repo_id, repo_type="model", exist_ok=True)  # Set exist_ok=True to avoid errors if it already exists

print(f"Repository '{repo_id}' created successfully.")

Repository 'SumeetSawale/nlp_a3_sst_working' created successfully.


### Push files to the repo

In [35]:
from huggingface_hub import upload_folder

folder_path = "/kaggle/working"

upload_folder(
    folder_path=folder_path,
    repo_id=repo_id,
    repo_type="model",
    commit_message="Upload full working directory"
)

print(f"Model uploaded successfully to {repo_id}.")

Upload 17 LFS files:   0%|          | 0/17 [00:00<?, ?it/s]

events.out.tfevents.1732084343.4e81e9dea07a.30.0:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

events.out.tfevents.1732098146.4e81e9dea07a.30.1:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/34.9k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.24k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

optimizer.pt:   0%|          | 0.00/34.9k [00:00<?, ?B/s]

rng_state.pth:   0%|          | 0.00/14.2k [00:00<?, ?B/s]

scheduler.pt:   0%|          | 0.00/1.06k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.24k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

run-psm9kk1n.wandb:   0%|          | 0.00/7.80M [00:00<?, ?B/s]

Model uploaded successfully to SumeetSawale/nlp_a3_sst_working.


## Analyze number of parameters in fine-tuned model

In [17]:
!pip install prettytable

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [18]:
from prettytable import PrettyTable

### Check trainable params

In [19]:
def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params

In [20]:
count_parameters(model)

+--------------+------------+
|   Modules    | Parameters |
+--------------+------------+
| score.weight |    4096    |
+--------------+------------+
Total Trainable Params: 4096


4096

### Check all params

In [22]:
def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        # if not parameter.requires_grad:
        #     continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Params: {total_params}")
    return total_params

In [23]:
count_parameters(model)

+-------------------------------------------------+------------+
|                     Modules                     | Parameters |
+-------------------------------------------------+------------+
|            model.embed_tokens.weight            | 262668288  |
|      model.layers.0.self_attn.q_proj.weight     |  4194304   |
|      model.layers.0.self_attn.k_proj.weight     |  1048576   |
|      model.layers.0.self_attn.v_proj.weight     |  1048576   |
|      model.layers.0.self_attn.o_proj.weight     |  4194304   |
|       model.layers.0.mlp.gate_proj.weight       |  16777216  |
|        model.layers.0.mlp.up_proj.weight        |  16777216  |
|       model.layers.0.mlp.down_proj.weight       |  16777216  |
|      model.layers.0.input_layernorm.weight      |    2048    |
|  model.layers.0.post_attention_layernorm.weight |    2048    |
|      model.layers.1.self_attn.q_proj.weight     |  4194304   |
|      model.layers.1.self_attn.k_proj.weight     |  1048576   |
|      model.layers.1.sel

1235818496

## Analyze model predictions

In [25]:
predictions = trainer.predict(tokenized_datasets["validation"])

logits = predictions.predictions
predicted_labels = np.argmax(logits, axis=-1)  # Get the class with the highest probability

true_labels = predictions.label_ids

label_map = {0: "Negative", 1: "Positive"}  # Adjust according to your dataset
predicted_classes = [label_map[label] for label in predicted_labels]
true_classes = [label_map[label] for label in true_labels]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


In [27]:
sentences = dataset["validation"]["sentence"]

for i in range(20):
    print(f"Sentence {i+1}: {sentences[i]}")
    print(f"Predicted: {predicted_classes[i]} ({predicted_labels[i]})")
    print(f"Actual:    {true_classes[i]} ({true_labels[i]})\n")

Sentence 1: it 's a charming and often affecting journey . 
Predicted: Positive (1)
Actual:    Positive (1)

Sentence 2: unflinchingly bleak and desperate 
Predicted: Positive (1)
Actual:    Negative (0)

Sentence 3: allows us to hope that nolan is poised to embark a major career as a commercial yet inventive filmmaker . 
Predicted: Positive (1)
Actual:    Positive (1)

Sentence 4: the acting , costumes , music , cinematography and sound are all astounding given the production 's austere locales . 
Predicted: Positive (1)
Actual:    Positive (1)

Sentence 5: it 's slow -- very , very slow . 
Predicted: Negative (0)
Actual:    Negative (0)

Sentence 6: although laced with humor and a few fanciful touches , the film is a refreshingly serious look at young women . 
Predicted: Positive (1)
Actual:    Positive (1)

Sentence 7: a sometimes tedious film . 
Predicted: Negative (0)
Actual:    Negative (0)

Sentence 8: or doing last year 's taxes with your ex-wife . 
Predicted: Negative (0)
Actu

## Custom inputs

In [54]:
custom_dataset = ["I love this!", "This was the worst experience ever."]

custom_tokens = tokenizer(
    custom_dataset, 
    padding=True, 
    truncation=True, 
    return_tensors="pt", 
    max_length=128
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

custom_tokens = {key: value.to(device) for key, value in custom_tokens.items()}

with torch.no_grad():
    custom_outputs = model(**custom_tokens)

custom_logits = custom_outputs.logits
custom_predicted_labels = torch.argmax(custom_logits, dim=-1).tolist()

label_map = ["Negative", "Positive"]
custom_predicted_classes = [label_map[label] for label in custom_predicted_labels]

for sentence, prediction in zip(custom_dataset, custom_predicted_classes):
    print(f"Sentence: {sentence}\nPrediction: {prediction}\n")

Sentence: I love this!
Prediction: Positive

Sentence: This was the worst experience ever.
Prediction: Negative

