In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import nltk
from nltk.corpus import stopwords
from unidecode import unidecode
import re

In [2]:
nltk.download('stopwords')
ENGLISH_STOP_WORDS = list(stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/ricardorr/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [3]:
extracted = pd.read_csv("steam_reviews.csv")
extracted.head(5)

Unnamed: 0,Recommendation,Review,Helpful
0,Not Recommended,TL:DR - This game is a psychologically manipul...,799
1,Not Recommended,"Worst Fifa ever, terrible passing, shooting, l...",710
2,Not Recommended,"gameplay, servers, full of bugs and problems. ...",292
3,Not Recommended,Year after year EA finds ways to make the game...,457
4,Not Recommended,I purchased this game 2 weeks ago and I still ...,436


In [4]:
def is_valid_word(word):
    # remove repeated letters, short words
    if len(word) <= 3:
        return False
    if re.search(r'(.)\1{2,}', word):
        return False
    if not re.match(r'^[a-zA-Z]+$', word):  # only letters
        return False
    return True

def clean_text(text):
    words = text.lower().split()
    return ' '.join([word for word in words if is_valid_word(word)])

In [5]:
# First cleaning step: translate non-ASCII characters to ASCII
extracted['CleanedReview'] = extracted['Review'].apply(unidecode)
# Remove numbers and punctuation
extracted['CleanedReview'] = extracted['CleanedReview'].str.replace(r'[^a-zA-Z\s]', '', regex=True)
extracted['CleanedReview'] = extracted['CleanedReview'].apply(clean_text)

extracted.head(5)

Unnamed: 0,Recommendation,Review,Helpful,CleanedReview
0,Not Recommended,TL:DR - This game is a psychologically manipul...,799,tldr this game psychologically manipulative sk...
1,Not Recommended,"Worst Fifa ever, terrible passing, shooting, l...",710,worst fifa ever terrible passing shooting inpu...
2,Not Recommended,"gameplay, servers, full of bugs and problems. ...",292,gameplay servers full bugs problems buying thi...
3,Not Recommended,Year after year EA finds ways to make the game...,457,year after year finds ways make game worse esp...
4,Not Recommended,I purchased this game 2 weeks ago and I still ...,436,purchased this game weeks still able play sing...


In [6]:
satisfied = extracted[extracted["Recommendation"] == 'Recommended']
not_satisfied = extracted[extracted['Recommendation'] == 'Not Recommended']
len(satisfied), len(not_satisfied)

(83, 317)

## Model used - Count vectorizer + logistic regression

Predicts how much each word (or ngram) relates to being the target variable, assigning weights to each of the tokens, those weights can than be used to measure how negative (or positive) a word is.

In [7]:
X = extracted["CleanedReview"].to_numpy()
Y = extracted["Recommendation"].to_numpy()
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.15)

classifier = Pipeline(
    [
        ('vectorizer', CountVectorizer(stop_words=ENGLISH_STOP_WORDS, binary=True, ngram_range=(1, 2))),
        # ('vectorizer', TfidfVectorizer(stop_words=ENGLISH_STOP_WORDS, binary=True, ngram_range=(1, 2))),
        ('log_regressor', LogisticRegression(penalty='l2', solver='saga', max_iter=10_000))
    ]
)

classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
acc = accuracy_score(y_pred, y_test)
f"Training accuracy: {acc} - {len(X_train)} samples, {len(X_test)} test samples"

'Training accuracy: 0.85 - 340 samples, 60 test samples'

In [8]:
classifier.classes_
# 0 = Not Recommended; 1 = Recommended

array(['Not Recommended', 'Recommended'], dtype=object)

In [9]:
def get_word_weights_for_class(classifier):
    """
    Get the word weights for a specific class in the classifier.
    :param classifier: The trained classifier pipeline.
    :param target_class: The target
    """
    vocabulary = classifier["vectorizer"].vocabulary_
    weights = classifier["log_regressor"].coef_
    print(weights.shape)

    word_weights = []
    for word in vocabulary.keys():
        # if number or too short, ignore
        if word.isnumeric() or len(word) < 3:
            continue
        j = vocabulary[word]
        word_weight_in_class = weights[0, j]
        word_weights.append((word_weight_in_class, word))
    
    return word_weights


In [10]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# In this case (binary regressor) -> the coefficients that lead to dissatisfaction the most are the ones with the most negative value.
# And the ones that move away from dissatisfaction the most are the ones with the most positive value.
word_weights = get_word_weights_for_class(classifier)
target_str = classifier.classes_[0]

sorted_word_weights = sorted(word_weights, reverse=False, key=lambda x: x[0])
word_weights_sorted = [t[0] for t in sorted_word_weights]
words_sorted = [t[1] for t in sorted_word_weights]

# Separate top N negative and top N positive words
N_WORDS_TO_VISUALIZE = 30
negative_weights = word_weights_sorted[:N_WORDS_TO_VISUALIZE]
negative_words = words_sorted[:N_WORDS_TO_VISUALIZE]

positive_weights = word_weights_sorted[-N_WORDS_TO_VISUALIZE:]
positive_words = words_sorted[-N_WORDS_TO_VISUALIZE:]

# Create subplot
fig = make_subplots(rows=1, cols=2, subplot_titles=[
    f"Words more related to '{target_str}'",
    f"Words less related to '{target_str}'"
])

# Negative bar chart
fig.add_trace(go.Bar(
    x=negative_weights,
    y=negative_words,
    orientation='h',
    marker=dict(color='crimson'),
    name='More correlated to the class'
), row=1, col=1)

# Positive bar chart
fig.add_trace(go.Bar(
    x=positive_weights,
    y=positive_words,
    orientation='h',
    marker=dict(color='seagreen'),
    name='Less correlated to the class'
), row=1, col=2)

# Update layout
fig.update_layout(
    height=600,
    width=1000,
    title_text=f"Correlation of words to the class '{target_str}'",
    showlegend=False,
    template="plotly_white"
)

# Improve axis labels
fig.update_yaxes(autorange="reversed", row=1, col=1)
fig.update_yaxes(row=1, col=2)

fig.show()
fig.write_image("out/steam_reviews_word_weights.png", width=1000, height=600)

(1, 11418)


## LDA

In [11]:
import gensim
from gensim import corpora
from gensim import models
from gensim.models import Nmf
from gensim.parsing.preprocessing import remove_stopwords, preprocess_string
from nltk.tokenize import word_tokenize

In [12]:
tokens = satisfied['CleanedReview'].apply(lambda x: preprocess_string(x, filters=[remove_stopwords]))

N_TOPICS = 3

dictionary = corpora.Dictionary(tokens)
corpus = [dictionary.doc2bow(token) for token in tokens]

# lda = models.ldamodel.LdaModel(corpus, num_topics=N_TOPICS, id2word=dictionary, passes=10, random_state=1, iterations=500)
lda = Nmf(corpus, num_topics=N_TOPICS, id2word=dictionary)

lista_topico = lda.print_topics(num_words=3)

for topico in lista_topico:
    print(topico)


(0, '0.084*"game" + 0.027*"feels" + 0.025*"like"')
(1, '0.038*"game" + 0.034*"play" + 0.028*"team"')
(2, '0.020*"like" + 0.016*"mode" + 0.015*"rewards"')


In [13]:
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go


In [14]:
lda_top = lda.show_topics(formatted=False)

fig = make_subplots(rows=3, cols=2, shared_yaxes=True, subplot_titles=[f"Topic {i+1}" for i in range(N_TOPICS)],)

for i in range(N_TOPICS):
    topico = lda_top[i][1]
    topico.sort(key=lambda x: x[1], reverse=True)

    palavra = list(zip(*topico))[0]
    score = list(zip(*topico))[1]
    
    fig.add_trace(go.Bar(x=palavra, y=score,
                         marker=dict(color=score, coloraxis="coloraxis")), i // 2 + 1, i % 2 + 1)
    
fig.update_layout(coloraxis=dict(colorscale='Bluered_r'), showlegend=False)
fig.show()
fig.write_image("out/satisfied_review_topics.png", width=1000, height=600)


# Analysing most relevant comments with LLM

In [1]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import os

For running this part, create a `.env` file in the same folder as the notebook and add the keys:
```
API_KEY = <Azure Open AI API KEY>
API_ENDPOINT = <Azure EndPoint>
```

In [2]:
load_dotenv()

client = AzureOpenAI(
    api_key=os.getenv("API_KEY"),
    api_version="2024-12-01-preview",
    azure_endpoint=os.getenv("API_ENDPOINT")
)

In [None]:
top_10_reviews = satisfied.sort_values(by="Helpful", ascending=False).iloc[:10, :]["Review"].to_list()

review_list = "\n".join(f"- {i+1}: {r}" for i, r in enumerate(top_10_reviews))

good_reviews_prompt = f"""You are a helpful assistant. I will give you a list of reviews about a game.
Your task is to summarize the reviews, and search for insights that can be useful for the game developers.
Please, be very specific and detailed in your answer.
Here are the reviews:
{review_list}
"""

response = client.chat.completions.create(
    model="gpt-4.1-nano",
    messages=[{"role": "user", "content": good_reviews_prompt}],
)

out = response.choices[0].message.content

print(out)

with open("out/good_reviews_summary_gpt.md", "w") as f:
    f.write(out)

### **Summary of Reviews and Developer Insights for EA Sports FC 25**

#### **General Sentiments**
- **Improved but Still Flawed**: Most players agree that *EA Sports FC 25* is an improvement over previous titles like FIFA 23 and 24, particularly in offline gameplay and Career Mode. However, the game still suffers from issues that undermine the overall experience (e.g., bugs, copy-pasted elements, and pricing strategies).

- **Offline Gameplay is the Highlight**: Career Mode has garnered substantial praise, with users appreciating its updates, enhanced gameplay mechanics, and added realism. However, many feel these updates could still benefit from greater depth and refinement.

- **Online Modes Are Disappointing**: Online gameplay has received significant criticism for being repetitive, unbalanced, and plagued by unskilled players spamming meta strategies (e.g., overusing specific players like Mbappé). Many reviewers avoid online modes entirely due to these issues.

- **Technical Issue

In [None]:
worse_10_reviews = not_satisfied.sort_values(by="Helpful", ascending=False).iloc[:10, :]["Review"].to_list()

review_list = "\n".join(f"- {i+1}: {r}" for i, r in enumerate(worse_10_reviews))

bad_reviews_prompt = f"""You are a helpful assistant. I will give you a list of only the bad reviews about a game.
Your task is to summarize the reviews, and search for insights that can be useful for the game developers.
Please, be very specific and detailed in your answer.
Here are the reviews:
{review_list}
"""

response = client.chat.completions.create(
    model="gpt-4.1-nano",
    messages=[{"role": "user", "content": bad_reviews_prompt}],
)

out = response.choices[0].message.content

print(out)

with open("out/bad_reviews_summary_gpt.md", "w") as f:
    f.write(out)

### Key Insights and Suggestions for Developers Based on Bad Reviews

The reviews of the game reveal a host of issues across gameplay, monetization, optimization, and general user experience. Below are detailed insights and possible avenues for improvement:

---

## 1. **Gameplay Mechanics: Lack of Responsiveness, AI Manipulation, and Perceived "Scripting"**
   Many players are frustrated with the in-game experience, particularly the mechanics that feel unfair, inconsistent, or outright "manipulated." Common issues include:
   - **Dynamic Difficulty Adjustment (DDA) or "Scripting":** Players feel that outcomes are artificially determined to drive monetary engagement in modes like Ultimate Team. Examples include:
     - AI inexplicably outperforming human players (e.g., low-rated defenders outrunning high-rated attackers).
     - Predictable match outcomes based on perceived AI intervention or “cheating.”
   - **Unresponsive Player Controls:**
     - Commands during crucial moments, suc