# Import Libraries

In [1]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
from transformers import pipeline
from wordcloud import WordCloud

# Data Loading

In [2]:
# Load your latest saved file
df_ori = pd.read_csv("AC_Shadows_sentiment_final.csv")

# Keep only non-empty review texts
df = df_ori[df_ori["review_text"].notnull()]

# (Optional) Filter to English
df = df[df["language"] == "english"]
print(f"Reviews after filtering: {len(df)}")


Reviews after filtering: 9172


In [3]:
df.head()

Unnamed: 0,review_id,review_text,votes_up,votes_funny,comment_count,author_steamid,author_playtime_forever,author_playtime_last_2weeks,language,timestamp_created,timestamp_updated,review_score,written_during_early_access
0,190981496,Micro transactions in a single-player game wit...,3817,100,0,76561198138838965,111,111,english,1742731944,1742731944,0.878522,False
1,190725656,I can't play this singleplayer game because of...,9534,229,310,76561198154468788,3093,3093,english,1742492415,1742492415,0.871569,False
2,190939988,13 hours into the game i deeply regret buying ...,4400,102,0,76561198294868408,1816,1816,english,1742688758,1742688860,0.867577,False
3,190677868,Microtransaction in single player game never c...,15366,376,0,76561198273179269,2485,2485,english,1742440803,1742440803,0.866033,False
4,190871217,There is NO reason a single player offline exp...,4024,77,0,76561198289521474,9,9,english,1742631374,1742696462,0.81839,False


# Data Labeling

## BERT

In [None]:

# Load BERT sentiment model
classifier = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

Device set to use cpu


In [9]:
# Optional: filter to English
df = df[df["language"] == "english"]
df = df.drop_duplicates(subset=["review_text"])
df = df[df["review_text"].notnull()]
df = df[df["review_text"].str.len() > 20]

In [10]:
def get_bert_sentiment(text):
    try:
        result = classifier(text[:512])[0]  # Truncate to 512 tokens (BERT limit)
        return result["label"]
    except:
        return "ERROR"

df["bert_sentiment"] = df["review_text"].apply(get_bert_sentiment)

In [11]:
print(df["bert_sentiment"].value_counts())


bert_sentiment
LABEL_2    4875
LABEL_0    1681
LABEL_1     785
ERROR        29
Name: count, dtype: int64


In [12]:
label_map = {
    "LABEL_0": "Negative",
    "LABEL_1": "Neutral",
    "LABEL_2": "Positive"
}

df["bert_sentiment"] = df["bert_sentiment"].map(label_map).fillna("ERROR")

In [13]:
df[df["bert_sentiment"] == "ERROR"][["review_text"]].head()

Unnamed: 0,review_text
521,"ㅆ1발 게임좀 하고 싶다, 지금 수십가지의 게임을 하고 있는데\n유일하게 섀도우스만..."
710,"Итого, за последние 10 лет мы получили 2 лучши..."
826,如果说「幻景」误信了一些老登的话，有点想找老色鬼三部曲的感觉，那么「影」毋庸置疑又坚决的换回...
853,ส่วนตัวมองว่าเกมไม่ได้แย่ ถ้าเป็นชื่ออื่น ที่ไ...
985,7/10 เป็นเกมที่สนุกดี แต่ก็ตาม style ubisoft o...


In [14]:
# Optionally drop rows with errors
df = df[df["bert_sentiment"] != "ERROR"]


In [15]:
df.head()

Unnamed: 0,review_id,review_text,votes_up,votes_funny,comment_count,author_steamid,author_playtime_forever,author_playtime_last_2weeks,language,timestamp_created,timestamp_updated,review_score,written_during_early_access,sentiment_score,sentiment_label,bert_sentiment
0,190981496,Micro transactions in a single-player game wit...,3817,100,0,76561198138838965,111,111,english,1742731944,1742731944,0.878522,False,0.0,Neutral,Neutral
1,190725656,I can't play this singleplayer game because of...,9534,229,310,76561198154468788,3093,3093,english,1742492415,1742492415,0.871569,False,-0.211,Negative,Negative
2,190939988,13 hours into the game i deeply regret buying ...,4400,102,0,76561198294868408,1816,1816,english,1742688758,1742688860,0.867577,False,-0.9054,Negative,Negative
3,190677868,Microtransaction in single player game never c...,15366,376,0,76561198273179269,2485,2485,english,1742440803,1742440803,0.866033,False,0.2584,Positive,Negative
4,190871217,There is NO reason a single player offline exp...,4024,77,0,76561198289521474,9,9,english,1742631374,1742696462,0.81839,False,-0.9067,Negative,Negative


In [None]:
df.to_csv('AC_Shadows_Bert.csv')