In [1]:
!pip install transformers datasets torch scikit-learn nltk tqdm spacy



## Import Dataset

In [2]:
from datasets import load_dataset

dataset = load_dataset("ilos-vigil/steam-review-aspect-dataset")
dataset

README.md:   0%|          | 0.00/9.68k [00:00<?, ?B/s]

data-00000-of-00001.arrow:   0%|          | 0.00/2.59M [00:00<?, ?B/s]

data-00000-of-00001.arrow:   0%|          | 0.00/529k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/900 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/200 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['appid', 'review', 'cleaned_review', 'labels'],
        num_rows: 900
    })
    test: Dataset({
        features: ['appid', 'review', 'cleaned_review', 'labels'],
        num_rows: 200
    })
})

In [3]:
import numpy as np

# Extract labels from the dataset
train_labels = np.array(dataset["train"]["labels"])  # Convert to NumPy array

# Calculate the frequency for each aspect
label_frequencies = np.sum(train_labels, axis=0)

# Display the frequencies
for i, freq in enumerate(label_frequencies):
    print(f"Aspect {i}: {freq} occurrences")

Aspect 0: 667.0 occurrences
Aspect 1: 400.0 occurrences
Aspect 2: 693.0 occurrences
Aspect 3: 391.0 occurrences
Aspect 4: 227.0 occurrences
Aspect 5: 259.0 occurrences
Aspect 6: 213.0 occurrences
Aspect 7: 97.0 occurrences


## Load Model

In [4]:
from transformers import XLNetForSequenceClassification, XLNetTokenizer

model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=8)
tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]



## Fine-tune Model

In [5]:
from transformers import Trainer, TrainingArguments
import torch
from nltk.tokenize import sent_tokenize
import os

os.environ["WANDB_DISABLED"] = "true"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

def preprocess_data(examples):
    inputs = tokenizer(examples["review"], truncation=True, padding="max_length", max_length=128)
    inputs["labels"] = examples["labels"]  # Dataset has multi-labels for each review
    return inputs

tokenized_datasets = dataset.map(preprocess_data, batched=True)

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    save_steps=10,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
)

trainer.train()

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Epoch,Training Loss,Validation Loss
1,0.5563,0.543317
2,0.5226,0.484895
3,0.4618,0.462478
4,0.3992,0.441454
5,0.3829,0.429864
6,0.3607,0.420869
7,0.3316,0.418582
8,0.3101,0.420658
9,0.2876,0.417443
10,0.2924,0.414777


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(devic

TrainOutput(global_step=290, training_loss=0.3960369381411322, metrics={'train_runtime': 254.8609, 'train_samples_per_second': 35.313, 'train_steps_per_second': 1.138, 'total_flos': 641012520960000.0, 'train_loss': 0.3960369381411322, 'epoch': 10.0})

## Evaluate

In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, hamming_loss
import torch
import numpy as np

def relaxed_accuracy(y_true, y_pred):
    per_sample_accuracies = [
        sum(y_t == y_p for y_t, y_p in zip(true, pred)) / len(true)
        for true, pred in zip(y_true, y_pred)
    ]
    return sum(per_sample_accuracies) / len(per_sample_accuracies)

# Define aspects and threshold
aspect_labels = ["Recommended", "Story", "Gameplay", "Visual", "Audio", "Technical", "Price", "Suggestions"]
threshold = 0.6

# Prepare ground truth and predictions
ground_truth = []
predictions = []

# Iterate through the dataset to evaluate
for example in dataset["test"]:  # Replace "test" with the relevant split
    # Ground truth for this sample
    ground_truth.append(example["labels"])  # Assuming multi-hot encoding for ground truth labels
    
    # Model prediction for this sample
    inputs = tokenizer(example["review"], return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    probs = torch.sigmoid(outputs.logits).squeeze().cpu().numpy()

    # Convert probabilities to binary predictions
    binary_predictions = (probs > threshold).astype(int)
    predictions.append(binary_predictions)

# Convert to numpy arrays for evaluation
ground_truth = np.array(ground_truth)
predictions = np.array(predictions)

# Compute metrics
relaxed_accuracy_score = relaxed_accuracy(ground_truth, predictions)
precision = precision_score(ground_truth, predictions, average="micro")
recall = recall_score(ground_truth, predictions, average="micro")
f1 = f1_score(ground_truth, predictions, average="micro")
hamming = hamming_loss(ground_truth, predictions)

print(f"Accuracy: {relaxed_accuracy_score:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Hamming Loss: {hamming:.4f}")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Accuracy: 0.8544
Precision: 0.8611
Recall: 0.7676
F1-Score: 0.8116
Hamming Loss: 0.1456


## Aspect Extraction

In [None]:
review = """The best game ever made by Valve. Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends. You made a great work, Valve, and thank You for this absolute masterpiece. 10/10."""
sentences = sent_tokenize(review)

aspect_labels = ["Recommended", "Story", "Gameplay", "Visual", "Audio", "Technical", "Price", "Suggestions"]
aspect_sentences = {aspect: [] for aspect in aspect_labels}

for sentence in sentences:
    inputs = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = model(**inputs)
    
    probs = torch.sigmoid(outputs.logits).squeeze().cpu().numpy()
    for idx, prob in enumerate(probs):
        if prob > 0.6:  # Threshold for relevance
            aspect_sentences[aspect_labels[idx]].append((sentence, prob))

print(aspect_sentences)