In [1]:
import pandas as pd
import torch
from transformers import DistilBertForSequenceClassification, Trainer, TrainingArguments, DistilBertTokenizer

In [2]:
# the English comments dataset
file_path = 'C:/Boardgames_ABSA/data/processed/english_boardgames_comments.csv'
english_comments_df = pd.read_csv(file_path)

In [4]:
model_2 = DistilBertForSequenceClassification.from_pretrained('C:/Boardgames_ABSA/models/distilbert_model')
tokenizer_2 = DistilBertTokenizer.from_pretrained('C:/Boardgames_ABSA/models/distilbert_model')

In [5]:
model_2.eval()

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.3, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.3, inplace=False)
 

In [6]:
# the sentiment analysis function with scores
def analyze_sentiment_model_2(text):
    inputs = tokenizer_2(text, padding=True, truncation=True, max_length=512, return_tensors='pt')
    
    # Move tensors to the device (GPU if available)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_2.to(device)
    inputs = {key: val.to(device) for key, val in inputs.items()}
    
    # Get model predictions
    with torch.no_grad():
        outputs = model_2(**inputs)
        logits = outputs.logits
        predicted_class = torch.argmax(logits, dim=1).item()
        score = torch.softmax(logits, dim=1).max().item()  # Calculate the score

    # Define sentiment labels
    labels = {0: 'negative', 1: 'positive'}
    
    return labels[predicted_class], score

In [7]:
english_comments_df[['sentiment_model_2', 'score_model_2']] = english_comments_df['value'].apply(
    lambda x: pd.Series(analyze_sentiment_model_2(x))
)

In [8]:
# Save the results for Model 2 to a new CSV file
sentiment_analysis_model_2_file = 'C:/Boardgames_ABSA/results/boardgames_sentiment_analysis_model_2.csv'
english_comments_df.to_csv(sentiment_analysis_model_2_file, index=False)

print(f"Sentiment analysis using Model 2 completed and results saved to {sentiment_analysis_model_2_file}")

Sentiment analysis using Model 2 completed and results saved to C:/Boardgames_ABSA/results/boardgames_sentiment_analysis_model_2.csv


In [9]:
print("\nSample of the results:")
print(english_comments_df[['boardgame_id', 'value', 'sentiment_model_2', 'score_model_2']].head(10))


Sample of the results:
   boardgame_id                                              value  \
0        224517  SLEEVED[IMG]https://cf.geekdo-static.com/mbs/m...   
1        224517  Great game, full controllo of your strategy th...   
2        224517                                      Location: MSK   
3        224517  Very clever game, enjoyable overall.  Plus poi...   
4        224517  Brilliant!  Fits right into my wheelhouse all ...   
5        224517  Absolutely brilliant!  I never played the orig...   
6        224517  I prefer old school Brass or AoI. I do like th...   
7        224517  The game itself is not interesting enough to l...   
8        224517                               "You can't do that."   
9        224517  This is a near-perfect board game because...  ...   

  sentiment_model_2  score_model_2  
0          negative       0.518042  
1          positive       0.997975  
2          positive       0.565053  
3          positive       0.993208  
4          positive 