In [3]:
from datasets import load_dataset

dataset = load_dataset("Sp1786/multiclass-sentiment-analysis-dataset")
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'label', 'sentiment'],
        num_rows: 31232
    })
    validation: Dataset({
        features: ['id', 'text', 'label', 'sentiment'],
        num_rows: 5205
    })
    test: Dataset({
        features: ['id', 'text', 'label', 'sentiment'],
        num_rows: 5206
    })
})

In [21]:
from datasets import DatasetDict

# Take 100 rows from each split using .select
small_train = dataset["train"].select(range(100))
small_val   = dataset["validation"].select(range(100))
small_test  = dataset["test"].select(range(100))

# Build new DatasetDict
small_dataset = DatasetDict({
    "train": small_train,
    "validation": small_val,
    "test": small_test
})

print(small_dataset)


DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'label', 'sentiment'],
        num_rows: 100
    })
    validation: Dataset({
        features: ['id', 'text', 'label', 'sentiment'],
        num_rows: 100
    })
    test: Dataset({
        features: ['id', 'text', 'label', 'sentiment'],
        num_rows: 100
    })
})


In [10]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer

model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [22]:

def preprocess(batch):
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=128)

tokenized_dataset = small_dataset.map(preprocess, batched=True)

tokenized_dataset

Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 1876.49 examples/s]
Map: 100%|██████████████████████████| 100/100 [00:00<00:00, 15484.56 examples/s]
Map: 100%|██████████████████████████| 100/100 [00:00<00:00, 17252.70 examples/s]


DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'label', 'sentiment', 'input_ids', 'attention_mask'],
        num_rows: 100
    })
    validation: Dataset({
        features: ['id', 'text', 'label', 'sentiment', 'input_ids', 'attention_mask'],
        num_rows: 100
    })
    test: Dataset({
        features: ['id', 'text', 'label', 'sentiment', 'input_ids', 'attention_mask'],
        num_rows: 100
    })
})

In [23]:
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")


In [24]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'labels', 'sentiment', 'input_ids', 'attention_mask'],
        num_rows: 100
    })
    validation: Dataset({
        features: ['id', 'text', 'labels', 'sentiment', 'input_ids', 'attention_mask'],
        num_rows: 100
    })
    test: Dataset({
        features: ['id', 'text', 'labels', 'sentiment', 'input_ids', 'attention_mask'],
        num_rows: 100
    })
})

# Baseline

In [25]:
import evaluate
import numpy as np
from transformers import Trainer

from evaluate import load
accuracy = load("accuracy")
f1 = load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    acc = accuracy.compute(predictions=preds, references=labels)
    f1_macro = f1.compute(predictions=preds, references=labels, average="macro")
    return {"accuracy": acc["accuracy"], "f1_macro": f1_macro["f1"]}

trainer = Trainer(
    model=model,   # your untrained model
    eval_dataset=tokenized_dataset["test"],  # or validation split
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


Downloading builder script: 6.79kB [00:00, 2.51MB/s]
  trainer = Trainer(


In [26]:
results = trainer.evaluate()
results



{'eval_loss': 1.0947623252868652,
 'eval_model_preparation_time': 0.0021,
 'eval_accuracy': 0.35,
 'eval_f1_macro': 0.26352201257861635,
 'eval_runtime': 3.289,
 'eval_samples_per_second': 30.404,
 'eval_steps_per_second': 3.953}

# Train

In [32]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()


  trainer = Trainer(


Step,Training Loss


TrainOutput(global_step=70, training_loss=0.5171008518763951, metrics={'train_runtime': 32.9484, 'train_samples_per_second': 30.35, 'train_steps_per_second': 2.125, 'total_flos': 33117440256000.0, 'train_loss': 0.5171008518763951, 'epoch': 10.0})

In [33]:
results = trainer.evaluate(tokenized_dataset["test"])
results



{'eval_loss': 0.9336875677108765,
 'eval_accuracy': 0.59,
 'eval_f1_macro': 0.5838734090127279,
 'eval_runtime': 0.7282,
 'eval_samples_per_second': 137.323,
 'eval_steps_per_second': 9.613,
 'epoch': 10.0}

In [34]:
results = trainer.evaluate(tokenized_dataset["validation"])
results



{'eval_loss': 0.9807398915290833,
 'eval_accuracy': 0.55,
 'eval_f1_macro': 0.531547619047619,
 'eval_runtime': 0.5768,
 'eval_samples_per_second': 173.365,
 'eval_steps_per_second': 12.136,
 'epoch': 10.0}

# After Finetune

In [35]:
from transformers import pipeline

sentiment_pipeline = pipeline(
    "text-classification",
    model=trainer.model,
    tokenizer=tokenizer,
    return_all_scores=True   # get probabilities for all classes
)

# Example texts
examples = [
    "I loved this movie, it was amazing!",
    "The film was okay, nothing special.",
    "This was terrible, I hated it."
]

for text in examples:
    print(text, "->", sentiment_pipeline(text))


Device set to use mps:0


I loved this movie, it was amazing! -> [[{'label': 'LABEL_0', 'score': 0.0899367555975914}, {'label': 'LABEL_1', 'score': 0.08619707822799683}, {'label': 'LABEL_2', 'score': 0.8238661885261536}]]
The film was okay, nothing special. -> [[{'label': 'LABEL_0', 'score': 0.5856354832649231}, {'label': 'LABEL_1', 'score': 0.19563674926757812}, {'label': 'LABEL_2', 'score': 0.21872778236865997}]]
This was terrible, I hated it. -> [[{'label': 'LABEL_0', 'score': 0.7305793762207031}, {'label': 'LABEL_1', 'score': 0.12306223064661026}, {'label': 'LABEL_2', 'score': 0.14635835587978363}]]


# Save model

In [42]:
# Add label mapping
model.config.id2label = {0: "negative", 1: "neutral", 2: "positive"}
model.config.label2id = {"negative": 0, "neutral": 1, "positive": 2}

# Save model + tokenizer
model.save_pretrained("./sentiment_model")
tokenizer.save_pretrained("./sentiment_model")


('./sentiment_model/tokenizer_config.json',
 './sentiment_model/special_tokens_map.json',
 './sentiment_model/vocab.txt',
 './sentiment_model/added_tokens.json',
 './sentiment_model/tokenizer.json')

In [43]:
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained("./sentiment_model")
tokenizer = AutoTokenizer.from_pretrained("./sentiment_model")

sentiment_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, return_all_scores=True)

print(sentiment_pipeline("The movie was wonderful!"))


Device set to use mps:0


[[{'label': 'negative', 'score': 0.258513480424881}, {'label': 'neutral', 'score': 0.1351109892129898}, {'label': 'positive', 'score': 0.606375515460968}]]


In [44]:
text = "I really loved this movie, it was amazing!"

outputs = sentiment_pipeline(text, return_all_scores=True)[0]
best = max(outputs, key=lambda x: x['score'])

print(f"Text: {text}")
print(f"Predicted sentiment: {best['label']} ({best['score']:.2f})")


Text: I really loved this movie, it was amazing!
Predicted sentiment: positive (0.82)


In [47]:
texts = [
    "I really loved this movie, it was amazing!",
    "any plans to go",
    "This was terrible, I hated it."
]

for t in texts:
    outputs = sentiment_pipeline(t, return_all_scores=True)[0]
    best = max(outputs, key=lambda x: x['score'])
    print(f"{t} -> {best['label']} ({best['score']:.2f})")


I really loved this movie, it was amazing! -> positive (0.82)
any plans to go -> neutral (0.41)
This was terrible, I hated it. -> negative (0.73)
