In [None]:
!pip install torch transformers accelerator datasets evaluate tqdm pandas nltk scikit-learn

Collecting accelerator
  Downloading accelerator-2024.9.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)
Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting bottle<0.13,>=0.12.7 (from accelerator)
  Downloading bottle-0.12.25-py3-none-any.whl.metadata (1.8 kB)
Collecting waitress>=1.0 (from accelerator)
  Downloading waitress-3.0.2-py3-none-any.whl.metadata (5.8 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading accel

## Import Aspect Classification Dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("ilos-vigil/steam-review-aspect-dataset")
dataset

README.md:   0%|          | 0.00/9.68k [00:00<?, ?B/s]

data-00000-of-00001.arrow:   0%|          | 0.00/2.59M [00:00<?, ?B/s]

data-00000-of-00001.arrow:   0%|          | 0.00/529k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/900 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/200 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['appid', 'review', 'cleaned_review', 'labels'],
        num_rows: 900
    })
    test: Dataset({
        features: ['appid', 'review', 'cleaned_review', 'labels'],
        num_rows: 200
    })
})

## Load Aspect Classification Model

In [None]:
from transformers import XLNetForSequenceClassification, XLNetTokenizer

aspect_model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=8)
aspect_tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Fine-tune Model

In [None]:
from transformers import Trainer, TrainingArguments
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, hamming_loss
import torch
from nltk.tokenize import sent_tokenize
import os

os.environ["WANDB_DISABLED"] = "true"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
aspect_model = aspect_model.to(device)

def preprocess_data(examples):
    inputs = aspect_tokenizer(examples["review"], truncation=True, padding="max_length", max_length=128)
    inputs["labels"] = examples["labels"]  # Dataset has multi-labels for each review
    return inputs

tokenized_datasets = dataset.map(preprocess_data, batched=True)

def compute_metrics(pred):
    logits, labels = pred
    predictions = (logits > 0.5).astype(int)

    # Compute metrics
    precision = precision_score(labels, predictions, average="micro")
    recall = recall_score(labels, predictions, average="micro")
    f1 = f1_score(labels, predictions, average="micro")

    return {
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    save_steps=10,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10
)

trainer = Trainer(
    model=aspect_model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=aspect_tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Precision,Recall,F1
1,0.5566,0.548726,0.758794,0.461774,0.574144
2,0.4945,0.503667,0.766798,0.593272,0.668966
3,0.4565,0.477402,0.783582,0.642202,0.705882
4,0.4173,0.453651,0.801869,0.655963,0.721615
5,0.387,0.438875,0.837953,0.600917,0.699911
6,0.3552,0.432018,0.828516,0.657492,0.733163
7,0.348,0.425044,0.820702,0.678899,0.743096
8,0.3237,0.418754,0.854,0.652905,0.740035
9,0.282,0.418429,0.839015,0.67737,0.749577
10,0.307,0.41782,0.838095,0.672783,0.746395


TrainOutput(global_step=570, training_loss=0.3985462230548524, metrics={'train_runtime': 1032.7867, 'train_samples_per_second': 8.714, 'train_steps_per_second': 0.552, 'total_flos': 592031766528000.0, 'train_loss': 0.3985462230548524, 'epoch': 10.0})

## Evaluate Aspect Classification Model

In [None]:
import torch
import numpy as np

def relaxed_accuracy(y_true, y_pred):
    per_sample_accuracies = [
        sum(y_t == y_p for y_t, y_p in zip(true, pred)) / len(true)
        for true, pred in zip(y_true, y_pred)
    ]
    return sum(per_sample_accuracies) / len(per_sample_accuracies)

# Define aspects and threshold
aspect_labels = ["Recommended", "Story", "Gameplay", "Visual", "Audio", "Technical", "Price", "Suggestions"]
threshold = 0.6

# Prepare ground truth and predictions
ground_truth = []
predictions = []

# Iterate through the dataset to evaluate
for example in dataset["test"]:  # Replace "test" with the relevant split
    # Ground truth for this sample
    ground_truth.append(example["labels"])  # Assuming multi-hot encoding for ground truth labels

    # Model prediction for this sample
    inputs = aspect_tokenizer(example["review"], return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = aspect_model(**inputs)
    probs = torch.sigmoid(outputs.logits).squeeze().cpu().numpy()

    # Convert probabilities to binary predictions
    binary_predictions = (probs > threshold).astype(int)
    predictions.append(binary_predictions)

# Convert to numpy arrays for evaluation
ground_truth = np.array(ground_truth)
predictions = np.array(predictions)

# Compute metrics
relaxed_accuracy_score = relaxed_accuracy(ground_truth, predictions)
precision = precision_score(ground_truth, predictions, average="micro")
recall = recall_score(ground_truth, predictions, average="micro")
f1 = f1_score(ground_truth, predictions, average="micro")
hamming = hamming_loss(ground_truth, predictions)

print(f"Accuracy: {relaxed_accuracy_score:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Hamming Loss: {hamming:.4f}")

Accuracy: 0.8356
Precision: 0.8138
Recall: 0.7752
F1-Score: 0.7940
Hamming Loss: 0.1644


## Aspect Extraction

In [None]:
review = """The best game ever made by Valve. Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends. You made a great work, Valve, and thank You for this absolute masterpiece. 10/10."""
sentences = sent_tokenize(review)

aspect_labels = ["Recommended", "Story", "Gameplay", "Visual", "Audio", "Technical", "Price", "Suggestions"]
aspect_sentences = {aspect: [] for aspect in aspect_labels}

for sentence in sentences:
    inputs = aspect_tokenizer(sentence, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = aspect_model(**inputs)

    probs = torch.sigmoid(outputs.logits).squeeze().cpu().numpy()
    for idx, prob in enumerate(probs):
        if prob > 0.6:  # Threshold for relevance
            aspect_sentences[aspect_labels[idx]].append((sentence, prob))

print(aspect_sentences)

{'Recommended': [('The best game ever made by Valve.', 0.8886448), ('Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends.', 0.9544032), ('You made a great work, Valve, and thank You for this absolute masterpiece.', 0.94264966), ('10/10.', 0.85539865)], 'Story': [('Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends.', 0.8038868)], 'Gameplay': [('Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends.', 0.884463)], 'Visual': [('Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends.', 0.6208334)], 'Audio': [], 'Technical': [], 'Price': [], 'Suggestions': []}


## Formatting Output

In [None]:
def concat_aspect_sentences(aspect_sentences):
    new_dict = {}
    for aspect in aspect_sentences:
        if(aspect_sentences[aspect] != []):
            text = ""
            for tuple in aspect_sentences[aspect]:
                text = text + " " + tuple[0]
            new_dict[aspect] = text

    return new_dict

In [None]:
from datasets import load_dataset
import pandas as pd
from sklearn.model_selection import train_test_split

review_data = pd.read_csv("/content/reviews.csv")
review_df = pd.DataFrame(review_data)
review_df

Unnamed: 0,ID,Game,Website,Review,Score
0,1,The Legend of Zelda: Ocarina of Time,Nintendorks,"The biggest, most heartpounding and awe-inspir...",100
1,2,The Legend of Zelda: Ocarina of Time,Weekly Famitsu,10 / 10 / 10 / 10 - platinum [first ever perfe...,100
2,3,The Legend of Zelda: Ocarina of Time,Gaming Maxx,"Nothing but pure 100% fun, challenging, and le...",100
3,4,The Legend of Zelda: Ocarina of Time,IGN,"Call us crazy, but when the final version of Z...",100
4,5,The Legend of Zelda: Ocarina of Time,Nintendojo,[Review Score = 110] What's significant about...,100
...,...,...,...,...,...
321243,322648,Family Party: 30 Great Games Obstacle Arcade,Nintendo Life,"As clunky and poorly considered as its title, ...",10
321244,322649,Family Party: 30 Great Games Obstacle Arcade,Game Revolution,"Family Party is terrible. Not only is it bad, ...",0
321245,322650,Disney Dreamlight Valley,God is a Geek,Disney Dreamlight Valley is brimming with Disn...,90
321246,322651,Disney Dreamlight Valley,Noisy Pixel,"If the change had happened much sooner, with e...",75


## Reshape Data

In [None]:
import nltk
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
nltk.download('punkt_tab')
lemmatizer = WordNetLemmatizer()

#!unzip /usr/share/nltk_data/corpora/wordnet.zip -d /usr/share/nltk_data/corpora/

def score_to_sentiment(score):
    if score >= 67:
        return 2
    elif score >= 34:
        return 1
    else:
        return 0

def preprocess_data(text):
    text = lemmatizer.lemmatize(text.lower())
    return text.lower()


# Drop irrelevant columns
review_df = review_df.drop('Website', axis=1)
review_df = review_df.drop('ID', axis=1)
review_df = review_df.drop('Game', axis=1)
review_df.dropna(inplace=True)

# Rename columns
review_df.rename(columns={"Score": "label"}, inplace=True)
review_df.rename(columns={"Review": "text"}, inplace=True)

# Change values
review_df["label"] = review_df["label"].apply(score_to_sentiment)
review_df["text"] = review_df["text"].apply(preprocess_data)

positive_df = review_df[review_df['label'] == 2].sample(frac=1)[:500]
positive_df_train, positive_df_test = train_test_split(positive_df, test_size=0.2, random_state=42)
neutral_df = review_df[review_df['label'] == 1].sample(frac=1)[:400]
neutral_df_train, neutral_df_test = train_test_split(neutral_df, test_size=0.2, random_state=42)
negative_df = review_df[review_df['label'] == 0].sample(frac=1)[:350]
negative_df_train, negative_df_test = train_test_split(negative_df, test_size=0.2, random_state=42)


review_df_train = pd.concat([positive_df_train, neutral_df_train, negative_df_train], axis=0).sample(frac=1)
review_df_test = pd.concat([positive_df_test, neutral_df_test, negative_df_test], axis=0).sample(frac=1)

review_df_train

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Unnamed: 0,text,label
235018,the game's a serious hand-eye workout with a l...,2
303402,"a few of the mini-games are fun, but none are ...",0
107258,to tackle the more inventive operations dreamt...,1
111122,it’s nice to see ea actually release a game th...,2
305565,invizimals: the lost kingdom is not only a bad...,0
...,...,...
314494,hatred is a game that basks in controversy for...,0
47872,gears of war 4 does everything required to bri...,2
320202,here's the bottom line: toy shop is an atrocio...,0
269952,one of the most pathetic jobs of porting a pc ...,0


## Initiate Sentiment Classification Model and Tokenizer

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification, XLNetForSequenceClassification, XLNetTokenizer

sentiment_tokenizer = XLNetTokenizer.from_pretrained("xlnet-base-cased")
sentiment_model = XLNetForSequenceClassification.from_pretrained(
    "xlnet-base-cased",
    num_labels = 3
)

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
from transformers import AutoTokenizer, BertTokenizer
from datasets import Dataset

# Tokenize the dataset
def tokenize_data(examples):
    return sentiment_tokenizer(examples["text"], padding="max_length")

review_dataset_train = Dataset.from_pandas(review_df_train)
review_dataset_test = Dataset.from_pandas(review_df_test)

tokenized_review_train = review_dataset_train.map(tokenize_data, batched=True)
tokenized_review_test = review_dataset_test.map(tokenize_data, batched=True)

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/250 [00:00<?, ? examples/s]

## Fine-tune Model for Sentiment Classification

In [None]:
from transformers import Trainer, TrainingArguments
import torch
from nltk.tokenize import sent_tokenize
import os

os.environ["WANDB_DISABLED"] = "true"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
sentiment_model = sentiment_model.to(device)

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    save_steps=10,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=10
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(pred):
    logits, labels = pred
    predictions = np.argmax(logits, axis=1)

    # Compute metrics
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, average="weighted")
    recall = recall_score(labels, predictions, average="weighted")
    f1 = f1_score(labels, predictions, average="weighted")

    return {
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    }

trainer = Trainer(
    model=sentiment_model,
    args=training_args,
    train_dataset=tokenized_review_train,
    eval_dataset=tokenized_review_test,
    compute_metrics=compute_metrics,
    tokenizer=sentiment_tokenizer,
)

trainer.train()

  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.7789,0.789327,0.712,0.721263,0.712,0.711118
2,0.4405,0.976349,0.66,0.664625,0.66,0.647308
3,0.2011,1.079898,0.712,0.708293,0.712,0.707513
4,0.214,1.219771,0.704,0.697393,0.704,0.694837
5,0.0783,1.524849,0.708,0.703608,0.708,0.704787
6,0.1105,1.771701,0.68,0.684707,0.68,0.675977
7,0.0801,1.842358,0.704,0.700536,0.704,0.701181
8,0.0144,1.958473,0.724,0.720869,0.724,0.721795
9,0.0012,1.890516,0.732,0.731493,0.732,0.731731
10,0.0045,1.929743,0.728,0.727011,0.728,0.727443


TrainOutput(global_step=630, training_loss=0.242319909976204, metrics={'train_runtime': 1541.7162, 'train_samples_per_second': 6.486, 'train_steps_per_second': 0.409, 'total_flos': 632791117073520.0, 'train_loss': 0.242319909976204, 'epoch': 10.0})

## Evaluate Sentiment Classification Model

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, hamming_loss

# Prepare ground truth and predictions
ground_truth = []
predictions = []

# Iterate through the dataset to evaluate
for index, row in review_df_test.iterrows():  # Replace "test" with the relevant split
    # Ground truth for this sample
    ground_truth.append(row["label"])  # Assuming multi-hot encoding for ground truth labels

    # Model prediction for this sample
    inputs = sentiment_tokenizer(row["text"], return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = sentiment_model(**inputs)
    # Extract logits and compute predicted class
    logits = outputs.logits  # Assuming your model outputs a `logits` tensor
    predicted_class = torch.argmax(logits, dim=-1).item()  # Get the class index with the highest score
    # Append prediction
    predictions.append(predicted_class)

# Convert to numpy arrays for evaluation
ground_truth = np.array(ground_truth)
predictions = np.array(predictions)

# Compute metrics
accuracy = accuracy_score(ground_truth, predictions)
precision = precision_score(ground_truth, predictions, average="weighted")
recall = recall_score(ground_truth, predictions, average="weighted")
f1 = f1_score(ground_truth, predictions, average="weighted")
hamming = hamming_loss(ground_truth, predictions)

# Output metrics
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Hamming Loss: {hamming}")

Accuracy: 0.728
Precision: 0.7270105580693816
Recall: 0.728
F1 Score: 0.7274425366587292
Hamming Loss: 0.272


In [None]:
review = "it's very dbad"
inputs = sentiment_tokenizer(review, return_tensors="pt").to(device)
with torch.no_grad():
    outputs = sentiment_model(**inputs)

logits = outputs.logits  # Assuming your model outputs a `logits` tensor
predicted_class = torch.argmax(logits, dim=-1).item()  # Get the class index with the highest score

if predicted_class == 2:
  print("Positive")
elif predicted_class == 1:
  print("Neutral")
else:
  print("Negative")

Neutral


## Test Full Pipeline

In [None]:
review = """The best game ever made by Valve. Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends. You made a great work, Valve, and thank You for this absolute masterpiece. 10/10."""
sentences = sent_tokenize(review)

aspect_labels = ["Recommended", "Story", "Gameplay", "Visual", "Audio", "Technical", "Price", "Suggestions"]
aspect_sentences = {aspect: [] for aspect in aspect_labels}

for sentence in sentences:
    inputs = aspect_tokenizer(sentence, return_tensors="pt", truncation=True, padding=True).to(device)
    with torch.no_grad():
        outputs = aspect_model(**inputs)

    probs = torch.sigmoid(outputs.logits).squeeze().cpu().numpy()
    for idx, prob in enumerate(probs):
        if prob > 0.6:  # Threshold for relevance
            aspect_sentences[aspect_labels[idx]].append((sentence, prob))

review_aspect = concat_aspect_sentences(aspect_sentences)

for aspect in review_aspect:
    aspect_text = review_aspect[aspect]
    inputs = sentiment_tokenizer(aspect_text, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = sentiment_model(**inputs)

    logits = outputs.logits  # Assuming your model outputs a `logits` tensor
    predicted_class = torch.argmax(logits, dim=-1).item()  # Get the class index with the highest score

    print(aspect, ":", aspect_text)
    if predicted_class == 2:
      print("Positive")
    elif predicted_class == 1:
      print("Neutral")
    else:
      print("Negative")

Recommended :  The best game ever made by Valve. Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends. You made a great work, Valve, and thank You for this absolute masterpiece. 10/10.
Positive
Story :  Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends.
Positive
Gameplay :  Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends.
Positive
Visual :  Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends.
Positive


## Save Model

In [None]:
from google.colab import drive
drive.mount('/content/drive')

ValueError: Mountpoint must not already contain files

In [None]:
# Define the directory to save the model
save_directory = "/content/drive/MyDrive/NLP"

# Save the model
sentiment_model.save_pretrained(save_directory)

# If using a tokenizer, save it as well
sentiment_tokenizer.save_pretrained(save_directory)

('/content/drive/MyDrive/NLP/tokenizer_config.json',
 '/content/drive/MyDrive/NLP/special_tokens_map.json',
 '/content/drive/MyDrive/NLP/spiece.model',
 '/content/drive/MyDrive/NLP/added_tokens.json')

In [None]:
aspect_model_save_dir = "/content/drive/MyDrive/NLP/AspectModel"

# Save the model
aspect_model.save_pretrained(aspect_model_save_dir)

# If using a tokenizer, save it as well
aspect_model.save_pretrained(aspect_model_save_dir)

In [None]:
loaded_model = XLNetForSequenceClassification.from_pretrained(save_directory)
loaded_tokenizer = XLNetTokenizer.from_pretrained(save_directory)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
loaded_model = loaded_model.to(device)

In [None]:
review = """The best game ever made by Valve. Great story, beautiful dialogues with funny jokes, interesting puzzles, perfect atmosphere and a lot of fun in co-op with friends. You made a great work, Valve, and thank You for this absolute masterpiece. 10/10."""

def absa(review):
  sentences = sent_tokenize(review)

  aspect_labels = ["Recommended", "Story", "Gameplay", "Visual", "Audio", "Technical", "Price", "Suggestions"]
  aspect_sentences = {aspect: [] for aspect in aspect_labels}

  for sentence in sentences:
      inputs = aspect_tokenizer(sentence, return_tensors="pt", truncation=True, padding=True).to(device)
      with torch.no_grad():
          outputs = aspect_model(**inputs)

      probs = torch.sigmoid(outputs.logits).squeeze().cpu().numpy()
      for idx, prob in enumerate(probs):
          if prob > 0.6:  # Threshold for relevance
              aspect_sentences[aspect_labels[idx]].append((sentence, prob))

  review_aspect = concat_aspect_sentences(aspect_sentences)

  for aspect in review_aspect:
      aspect_text = review_aspect[aspect]
      inputs = loaded_tokenizer(aspect_text, return_tensors="pt").to(device)
      with torch.no_grad():
          outputs = loaded_model(**inputs)

      logits = outputs.logits  # Assuming your model outputs a `logits` tensor
      predicted_class = torch.argmax(logits, dim=-1).item()  # Get the class index with the highest score

      print(aspect, ":", aspect_text)
      if predicted_class == 2:
        print("Positive")
      elif predicted_class == 1:
        print("Neutral")
      else:
        print("Negative")

In [None]:
review = input("Insert your review: ")

absa(review)

Insert your review: It's a bad game!
Recommended :  It's a bad game!
Negative
