In [1]:
import torch
import torch.nn as nn
import pathlib
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, BertForSequenceClassification
from transformers import Trainer, TrainingArguments
import evaluate
import numpy as np


tokenizer = AutoTokenizer.from_pretrained("./sentiment_model", local_files_only=True)

sentiment_model = AutoModelForSequenceClassification.from_pretrained("./sentiment_model", local_files_only=True)

stars_model = AutoModelForSequenceClassification.from_pretrained("./stars_model", local_files_only=True)


def preprocess_imdb(dataset_dir):
    cur_dir = pathlib.Path(dataset_dir)
    texts = []
    sentiments = []
    stars = []
    for sentiment_dir in ['pos', 'neg']:
        for text_file in (cur_dir/sentiment_dir).iterdir():
            texts.append(text_file.read_text(encoding='utf-8'))
            sentiments.append(1 if sentiment_dir == 'pos' else 0)
            star_count = int(text_file.name[:-4].split('_')[1])
            stars.append(star_count - 1)
    
    return texts, sentiments, stars


texts_train, sentiments_train, stars_train = preprocess_imdb('aclImdb/train')
texts_test, sentiments_test, stars_test = preprocess_imdb('aclImdb/test')

In [2]:
train_encodings = tokenizer(texts_train, truncation=True, padding="max_length", return_tensors="pt", max_length=256)
test_encodings = tokenizer(texts_test, truncation=True, padding="max_length", return_tensors="pt", max_length=256)


train_dataset = Dataset.from_dict({"input_ids": train_encodings["input_ids"],
                                   "attention_mask": train_encodings["attention_mask"],
                                   "labels": torch.tensor(sentiments_train)})
train_dataset.set_format("pt")


sentiments_test_dataset = Dataset.from_dict({"input_ids": test_encodings["input_ids"],
                                  "attention_mask": test_encodings["attention_mask"],
                                  "labels": torch.tensor(sentiments_test)})
sentiments_test_dataset.set_format("pt")

stars_test_dataset = Dataset.from_dict({"input_ids": test_encodings["input_ids"],
                                  "attention_mask": test_encodings["attention_mask"],
                                  "labels": torch.tensor(stars_test)})
stars_test_dataset.set_format("pt")

In [12]:
train_encodings["input_ids"].shape

torch.Size([25000, 256])

In [14]:
training_args = TrainingArguments(
    output_dir='tmp',
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=1
)

sentiment_trainer = Trainer(
    model=sentiment_model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer
)

sentiment_res = sentiment_trainer.predict(sentiments_test_dataset)



PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
***** Running Prediction *****
  Num examples = 25000
  Batch size = 32


PredictionOutput(predictions=array([[-2.1877115 ,  2.2571092 ],
       [-1.7224199 ,  1.685449  ],
       [-1.9839073 ,  1.8819491 ],
       ...,
       [ 1.4794309 , -1.9533433 ],
       [ 1.8856187 , -1.955772  ],
       [ 0.31323573, -1.0763208 ]], dtype=float32), label_ids=array([1, 1, 1, ..., 0, 0, 0], dtype=int64), metrics={'test_loss': 0.21519549190998077, 'test_runtime': 158.1381, 'test_samples_per_second': 158.09, 'test_steps_per_second': 4.945})


In [20]:
sentiment_preds = np.argmax(sentiment_res.predictions, axis=-1)

from sklearn.metrics import f1_score, mean_absolute_error

sentiment_accuracy = (np.array(sentiment_preds) == np.array(sentiments_test)).mean()
sentiment_f1 = f1_score(sentiments_test, sentiment_preds)

print("Sentiment Model - Test Accuracy:", sentiment_accuracy)
print("Sentiment Model - Test F1:", sentiment_f1)


Sentiment Model - Test Accuracy: 0.91824
Sentiment Model - Test F1: 0.9185462660396907


In [22]:
stars_trainer = Trainer(
    model=stars_model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=tokenizer
)

res = stars_trainer.predict(stars_test_dataset)


***** Running Prediction *****
  Num examples = 25000
  Batch size = 32


In [48]:
stars_preds = np.argmax(res.predictions, axis=-1)

stars_accuracy = (np.array(stars_preds) == np.array(stars_test)).mean()
stars_f1 = f1_score(stars_test, stars_preds, average='micro')
stars_mae = mean_absolute_error(stars_test, stars_preds)

print("Stars Model - Test Accuracy:", stars_accuracy)
print("Stars Model - Test F1:", stars_f1)
print("Stars Model - Test MAE:", stars_mae)

Stars Model - Test Accuracy: 0.46212
Stars Model - Test F1: 0.46212
Stars Model - Test MAE: 1.15804


In [41]:
stars_one_off_accuracy = (np.abs(np.array(preds) - np.array(stars_test)) <= 1).mean()
print(stars_one_off_accuracy)

0.70704
