In [1]:
import pandas as pd
import plotly.express as px

In [2]:
from helpers import database

reviews = database.get_reviews()
reviews.head()

Unnamed: 0,id,recommendation_id,author_steamid,author_num_games_owned,author_num_reviews,author_playtime_forever,author_playtime_last_two_weeks,author_playtime_at_review,author_last_played,language,...,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,received_for_free,written_during_early_access,primarily_steam_deck
0,215760880,,76561199416294706,0,6,2368,626,2368,1768152153,schinese,...,1768152264,True,0,0,0.5,0,False,False,False,False
1,215760124,,76561199565605732,0,4,325,325,264,1768155336,english,...,1768151687,True,0,0,0.5,0,True,False,False,False
2,215758567,,76561198057788180,0,6,1358,181,1267,1768154576,japanese,...,1768150503,False,0,0,0.5,0,True,False,False,False
3,215756568,,76561198026330868,39,10,4201,2559,4150,1768154369,english,...,1768148999,True,0,0,0.5,0,True,False,False,False
4,215756060,,76561199751191801,0,2,5242,5242,5242,1768148529,french,...,1768148608,True,0,0,0.5,0,True,False,False,False


In [3]:
def sentiment_pies(stats, title: str, facet=None):
    """
    Create a pie chart of the sentiment
    :param stats: count of reviews
    :param title: str
    :param facet: column
    """
    fig = px.pie(
        stats,
        names="sentiment",
        values="count",
        color="sentiment",
        color_discrete_map={"Positive": "green", "Negative": "red"},
        facet_col=facet,
        title=title,
        template="plotly_dark"
    )
    fig.update_traces(
        hovertemplate="%{label}: %{value} reviews (%{percent})",
        textposition='inside',
        textinfo='percent+label',
        marker=dict(line=dict(color='white', width=1.5))
    )
    fig.update_layout(height=500, legend_orientation="h", title_font_size=20)
    fig.show()

In [4]:
# Global sentiment
counts = reviews["voted_up"].value_counts().rename({True: "Positive", False: "Negative"}).reset_index()
counts.columns = ["sentiment", "count"]
counts

Unnamed: 0,sentiment,count
0,Positive,21142
1,Negative,6334


In [5]:
sentiment_pies(counts, "Global sentiment")

In [6]:
# Top 5 languages + emotions according to language
language_counts = (
    reviews.groupby(["language", "voted_up"])
    .size()
    .reset_index(name="count")
)
language_counts["voted_up"] = language_counts["voted_up"].replace({True: "Positive", False: "Negative"})
language_counts.columns = ["language", "sentiment", "count"]
language_counts.head()

Unnamed: 0,language,sentiment,count
0,arabic,Positive,1
1,brazilian,Negative,101
2,brazilian,Positive,896
3,bulgarian,Positive,2
4,czech,Negative,2


In [7]:
totals = language_counts.groupby("language")["count"].sum().reset_index()
totals = totals.sort_values("count", ascending=False)
top_languages = totals.head(5)["language"].tolist()
top_languages

['english', 'schinese', 'spanish', 'brazilian', 'french']

In [8]:
top_lang_counts = language_counts[language_counts["language"].isin(top_languages)]
top_lang_counts["language"] = pd.Categorical(top_lang_counts["language"], categories=top_languages, ordered=True)
top_lang_counts = top_lang_counts.sort_values("language")
top_lang_counts



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,language,sentiment,count
10,english,Negative,2465
11,english,Positive,10310
41,schinese,Negative,2359
42,schinese,Positive,4731
43,spanish,Negative,133
44,spanish,Positive,1000
1,brazilian,Negative,101
2,brazilian,Positive,896
14,french,Negative,198
15,french,Positive,695


In [9]:
sentiment_pies(top_lang_counts, "Sentiment by Language (Top 5)", "language")

In [10]:
# Group positive review
language_data = (
    reviews.groupby("language")
      .agg(total_reviews=("id", "count"),
           positive_reviews=("voted_up", "sum"))
      .reset_index()
)

language_data["positivity"] = language_data["positive_reviews"] / language_data["total_reviews"]

In [11]:
# Choose language data with total review >=100
language_data = language_data[language_data["total_reviews"] >= 100]
language_data = language_data.sort_values("positivity", ascending=True)


In [12]:
# Create bar chart
bars = px.bar(
    language_data,
    x="positivity",
    y="language",
    orientation="h",
    text="total_reviews",
    labels={"positivity": "Sentiment", "language": "Language", "total_reviews": "Reviews"},
    color="positivity",
    color_continuous_scale=[(0, "red"), (1, "green")],
    range_x=[0, 1],
    title="Sentiment by language with Total Reviews >=100",
)

bars.update_traces(texttemplate="%{text:,}", textposition="inside")
bars.update_layout(
    height=max(400, 30 * len(language_data)),
    coloraxis_showscale=False,
)

bars.update_xaxes(tickformat=".0%")
bars.show()

In [13]:
# Overall distribution by language (large pie chart)
lang_total = reviews["language"].value_counts().reset_index()
lang_total.columns = ["language", "count"]
lang_total.head()

Unnamed: 0,language,count
0,english,12775
1,schinese,7090
2,spanish,1133
3,brazilian,997
4,french,893


In [14]:
fig = px.pie(
    lang_total,
    values="count",
    names="language",
    title="Review Volume by Language",
    template="plotly_dark",
    hole=0
)
fig.update_traces(textposition='inside', textinfo='percent+label', pull=[0.05,0.03,0,0,0])
fig.update_layout(height=700, width=900)
fig.show()

In [25]:
reviews = database.get_reviews("english")
reviews.head()

Unnamed: 0,id,recommendation_id,author_steamid,author_num_games_owned,author_num_reviews,author_playtime_forever,author_playtime_last_two_weeks,author_playtime_at_review,author_last_played,language,...,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,received_for_free,written_during_early_access,primarily_steam_deck
0,215760124,,76561199565605732,0,4,325,325,264,1768155336,english,...,1768151687,True,0,0,0.5,0,True,False,False,False
1,215756568,,76561198026330868,39,10,4201,2559,4150,1768154369,english,...,1768148999,True,0,0,0.5,0,True,False,False,False
2,215744888,,76561198066200885,221,44,5543,0,5543,1739371844,english,...,1768140031,True,0,0,0.5,0,True,False,False,False
3,215742083,,76561198880718555,0,8,3239,2734,3147,1768155154,english,...,1768137616,True,0,0,0.5,0,True,False,False,False
4,215740976,,76561199486130374,14,3,200,200,200,1768055486,english,...,1768136629,True,0,0,0.5,0,False,False,False,False


In [26]:
def get_positivity_percentage():
    global monthly
    # Change timestamp to datetime
    reviews["created"] = pd.to_datetime(reviews["timestamp_created"], unit="s")

    # Create Period for month-year
    reviews["year_month"] = reviews["created"].dt.to_period("M").dt.to_timestamp()

    # Calculate the average positivity for each month
    monthly = (
        reviews.groupby("year_month")["voted_up"]
        .mean()
        .reset_index(name="positivity")
    )

    # Change to percentage
    monthly["positivity_pct"] = monthly["positivity"] * 100

get_positivity_percentage()

In [27]:
import plotly.graph_objects as go

def plot_monthly_review(language="English"):
    global fig
    fig = go.Figure()

    # Create bar chart
    fig.add_trace(
        go.Bar(
            x=monthly["year_month"],
            y=monthly["positivity_pct"],
            name="Positivity (%)",
            marker=dict(
                color=monthly["positivity_pct"],
                colorscale="Magma",
                cmin=0,
                cmax=100
            ),
            opacity=0.70,  # make line stand out
            width=22 * 24 * 60 * 60 * 1000,  # ~22 days
            hovertemplate="%{x|%Y-%m}<br>Positivity: %{y:.1f}%"
        )
    )

    # Create line chart
    fig.add_trace(
        go.Scatter(
            x=monthly["year_month"],
            y=monthly["positivity_pct"],
            name="Trend",
            mode="lines+markers",
            line=dict(color="#00d4ff", width=3),
            marker=dict(size=8, color="#00d4ff", symbol="circle"),
            hovertemplate="%{x|%Y-%m}<br>Positivity: %{y:.1f}%"
        )
    )

    # Dark layout
    fig.update_layout(
        title=dict(
            text=("Monthly Sentiment - %s Reviews (Bar + Trend)" % language),
            font=dict(size=20, color="#e0e0e0")
        ),
        xaxis_title="Month",
        yaxis_title="Positive Reviews (%)",
        template="plotly_dark",
        height=600,
        hovermode="x unified",
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="right",
            x=1,
            font=dict(color="#e0e0e0")
        ),
        margin=dict(l=60, r=40, t=80, b=60),
        bargap=0.18
    )

    # Y axis
    fig.update_yaxes(
        range=[0, 100],
        title_font=dict(color="#e0e0e0"),
        tickfont=dict(color="#cccccc")
    )

    # X axis
    fig.update_xaxes(
        tickformat="%Y-%m",
        tickangle=-45,
        title_font=dict(color="#e0e0e0"),
        tickfont=dict(color="#cccccc")
    )

    fig.show()


plot_monthly_review()

In [29]:
reviews = database.get_reviews("japanese")
get_positivity_percentage()
plot_monthly_review(language="Japanese")

In [None]:
"""
November 2025 Update

âœ” Square Enix will release update 1.004 on November 4, 2025, for Final Fantasy VII Rebirth on PC.

âœ” This update adds support for DLSS 4 Multi-Frame Generation (increasing performance/frames for Nvidia GPUs), along with bug fixes and stability improvements for the game.
"""