# This is a sample Jupyter Notebook

Below is an example of a code cell. 
Put your cursor into the cell and press Shift+Enter to execute it and select the next one, or click 'Run Cell' button.

Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

To learn more about Jupyter Notebooks in PyCharm, see [help](https://www.jetbrains.com/help/pycharm/ipython-notebook-support.html).
For an overview of PyCharm, go to Help -> Learn IDE features or refer to [our documentation](https://www.jetbrains.com/help/pycharm/getting-started.html).

In [1]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce RTX 2060


In [2]:
from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding,
)
from datasets import load_dataset
from evaluate import load
import wandb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
wandb.login()

wandb: Currently logged in as: kostic-stojan23 (kostic-stojan23-university-of-belgrade). Use `wandb login --relogin` to force relogin


True

In [4]:
raw_datasets = load_dataset("imdb")
metric = load("accuracy")

tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=False)

tokenized_datasets = raw_datasets.map(preprocess_function, batched=True)

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

In [5]:
train_dataset = tokenized_datasets["train"]
test_dataset = tokenized_datasets["test"]

model = DistilBertForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=2
)
model.to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [7]:
data_collator = DataCollatorWithPadding(tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), axis=1)
    return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    logging_dir="./logs",
    logging_steps=50,
    report_to="wandb",
    run_name="NLP_MAML_PROJECT",
    save_total_limit=2,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)


In [8]:
trainer.train()

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2284,0.244889,0.90124
2,0.1402,0.237675,0.92836
3,0.06,0.305039,0.93148


TrainOutput(global_step=4689, training_loss=0.16076723696402992, metrics={'train_runtime': 4921.0181, 'train_samples_per_second': 15.241, 'train_steps_per_second': 0.953, 'total_flos': 9834539051060448.0, 'train_loss': 0.16076723696402992, 'epoch': 3.0})

In [9]:
results = trainer.evaluate()
print(f"Results: {results}")

Results: {'eval_loss': 0.2376745641231537, 'eval_accuracy': 0.92836, 'eval_runtime': 391.4413, 'eval_samples_per_second': 63.867, 'eval_steps_per_second': 3.993, 'epoch': 3.0}


In [13]:
save_path = "./NLP_ver1"  # Path where you want to save the model
trainer.save_model(save_path)

# Save the tokenizer as well
tokenizer.save_pretrained(save_path)

('./NLP_ver1\\tokenizer_config.json',
 './NLP_ver1\\special_tokens_map.json',
 './NLP_ver1\\vocab.txt',
 './NLP_ver1\\added_tokens.json',
 './NLP_ver1\\tokenizer.json')

In [15]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
import torch

save_path = "./NLP_ver1"
model = DistilBertForSequenceClassification.from_pretrained(save_path)
tokenizer = DistilBertTokenizer.from_pretrained(save_path)

test_texts = [
    "I love this movie, it's fantastic!",
    "This movie was terrible, I hated it.",
    "It was an okay movie, not bad but not great either."
]

# Tokenize the inputs
inputs = tokenizer(test_texts, padding=True, truncation=True, return_tensors="pt")

# Make predictions (ensure model is in evaluation mode)
model.eval()
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits

predictions = torch.argmax(logits, dim=-1)

predicted_labels = ['positive' if label == 1 else 'negative' for label in predictions]

# Print the results
for text, label in zip(test_texts, predicted_labels):
    print(f"Text: {text} -> Predicted Sentiment: {label}")

Text: I love this movie, it's fantastic! -> Predicted Sentiment: positive
Text: This movie was terrible, I hated it. -> Predicted Sentiment: negative
Text: It was an okay movie, not bad but not great either. -> Predicted Sentiment: negative
