In [1]:
from helpers import database
import pandas as pd

In [2]:
reviews = database.get_reviews()
reviews

Unnamed: 0,id,recommendation_id,author_steamid,author_num_games_owned,author_num_reviews,author_playtime_forever,author_playtime_last_two_weeks,author_playtime_at_review,author_last_played,language,...,timestamp_updated,voted_up,votes_up,votes_funny,weighted_vote_score,comment_count,steam_purchase,received_for_free,written_during_early_access,primarily_steam_deck
0,215760880,,76561199416294706,0,6,2368,626,2368,1768152153,schinese,...,1768152264,True,0,0,0.5,0,False,False,False,False
1,215760124,,76561199565605732,0,4,325,325,264,1768155336,english,...,1768151687,True,0,0,0.5,0,True,False,False,False
2,215758567,,76561198057788180,0,6,1358,181,1267,1768154576,japanese,...,1768150503,False,0,0,0.5,0,True,False,False,False
3,215756568,,76561198026330868,39,10,4201,2559,4150,1768154369,english,...,1768148999,True,0,0,0.5,0,True,False,False,False
4,215756060,,76561199751191801,0,2,5242,5242,5242,1768148529,french,...,1768148608,True,0,0,0.5,0,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27471,215831579,,76561198424217785,144,16,12106,4719,11544,1768411220,schinese,...,1768233953,False,0,0,0.5,0,True,False,False,False
27472,215980691,,76561198117111316,146,43,4564,3527,4564,1768424323,czech,...,1768427023,True,0,0,0.5,0,True,False,False,False
27473,215979542,,76561198866209389,0,5,6212,1167,6212,1768337188,english,...,1768425376,True,0,0,0.5,0,True,False,False,False
27474,215978161,,76561198092445456,0,10,605,605,605,1768340896,english,...,1768423605,True,0,0,0.5,0,True,False,False,False


In [3]:
reviews.columns

Index(['id', 'recommendation_id', 'author_steamid', 'author_num_games_owned',
       'author_num_reviews', 'author_playtime_forever',
       'author_playtime_last_two_weeks', 'author_playtime_at_review',
       'author_last_played', 'language', 'review', 'timestamp_created',
       'timestamp_updated', 'voted_up', 'votes_up', 'votes_funny',
       'weighted_vote_score', 'comment_count', 'steam_purchase',
       'received_for_free', 'written_during_early_access',
       'primarily_steam_deck'],
      dtype='object')

In [4]:
reviews["created"] = pd.to_datetime(reviews["timestamp_created"], unit="s")
first_review_time =  reviews["created"].min()
reviews["days_since_first"] = (reviews["created"] - first_review_time).dt.days
daily_counts = reviews.groupby("days_since_first").size().reset_index(name="count")

In [5]:
import plotly.express as px

fig = px.bar(
    daily_counts,
    x=daily_counts["days_since_first"],
    y=daily_counts["count"],
    title='Reviews over time')

fig.update_xaxes(title_text='Days since release')
fig.update_yaxes(title_text='Number of reviews')
fig.show()

In [6]:
reviews["hours_since_first"] = ((reviews["created"] - first_review_time) / pd.Timedelta(hours=1)).astype(int)

hourly_counts = (
    reviews.loc[reviews["hours_since_first"].between(0, 48)]
           .groupby("hours_since_first")
           .size()
           .reset_index(name="count")
)

In [7]:
fig = px.bar(
    hourly_counts,
    x=hourly_counts["hours_since_first"],
    y=hourly_counts["count"],
    title='Reviews over time (first 48 hours)')

fig.update_xaxes(title_text='Hours since release')
fig.update_yaxes(title_text='Number of reviews')
fig.show()

In [8]:
top_authors = (
    reviews[["author_steamid", "author_playtime_forever", "review"]]
    .sort_values("author_playtime_forever", ascending=False)
    .head(10)
    .assign(playtime_hours=lambda df: df["author_playtime_forever"] / 60)
    [["author_steamid", "playtime_hours", "review"]]
)
top_authors

Unnamed: 0,author_steamid,playtime_hours,review
27310,76561199444262981,4824.783333,无需多言，看我时长，就知道这是一部神作。我觉得很有意思，量大管饱，小游戏也很好玩，偶尔消遣时...
19898,76561198274830708,3004.1,《最终幻想7 重生》不负期待！开放世界探索自由且浪漫，陆行鸟驰骋草原、金碟游乐场狂欢，经典元...
21560,76561198885434451,1843.866667,"The most beloved game, improvement in texture ..."
23241,76561198235770124,1464.733333,优化太垃圾了，经常到了一个场景没有渲染完成，整个游戏就卡住了
25763,76561198293805262,1350.283333,好玩 除了小游戏
13054,76561199349139816,1292.133333,垃圾游戏，还我爱丽丝，呜呜呜，毛线的回归星球，寄刀子还给我委婉上了
14896,76561198819740747,1240.0,能不能好好优化啊，我3060全部画面基本都是选低还是卡顿掉帧
15676,76561197998361624,1225.05,"This game is OUTSTANDING, the game play, the c..."
17441,76561199706531709,1222.883333,Runs great on my 75 in TCL even TY you Square :)
25233,76561198858198351,1199.583333,第三部搞快点!


In [9]:
reviews["playtime_hours"] = (reviews["author_playtime_at_review"] / 60).clip(0, 300).astype(int)
playtime_counts = reviews.groupby("playtime_hours").size().reset_index(name="count")

In [10]:
playtime_counts

Unnamed: 0,playtime_hours,count
0,0,1000
1,1,607
2,2,594
3,3,1094
4,4,804
...,...,...
292,295,2
293,296,1
294,297,2
295,299,3


In [11]:
fig = px.bar(
    playtime_counts,
    x=playtime_counts["playtime_hours"],
    y=playtime_counts["count"],
    title='Distribution of reviews by playtime (up to 300 hours)')

fig.update_xaxes(title_text='Hours played')
fig.update_yaxes(title_text='Number of reviews')
fig.show()

In [12]:
reviews["playtime_bin"] = pd.cut(reviews["playtime_hours"], bins=range(0, 305, 5), include_lowest=True)
positivity = reviews.groupby("playtime_bin")["voted_up"].mean().reset_index()
positivity["positivity_pct"] = positivity["voted_up"] * 100





In [13]:
fig = px.bar(
    positivity,
    x=positivity["playtime_bin"].astype(str),
    y=positivity["positivity_pct"],
    title='Sentiment by playtime (up to 300 hours)')

fig.update_xaxes(title_text='Hours played (binned)')
fig.update_yaxes(title_text='Positive reviews (%)')
fig.show()

In [14]:
positivity_per_hour = reviews.groupby("playtime_hours")["voted_up"].mean()
positivity_pct = (positivity_per_hour * 100)
positivity_pct.head()

playtime_hours
0    60.100000
1    68.698517
2    69.865320
3    83.363803
4    80.099502
Name: voted_up, dtype: float64

In [15]:
fig = px.bar(
    positivity_per_hour,
    x=positivity_pct.index,
    y=positivity_pct.values,
    title='Sentiment by playtime (up to 300 hours)')

fig.update_xaxes(title_text='Hours played')
fig.update_yaxes(title_text='Positive reviews (%)')
fig.show()

In [16]:
release_date = pd.Timestamp("2025-01-23") # ff7 rebirth published date
reviews["created"] = pd.to_datetime(reviews["timestamp_created"], unit="s")
reviews = reviews[
    (reviews["created"] >= release_date - pd.Timedelta(days=30)) &
    (reviews["created"] <= release_date + pd.Timedelta(days=30))
]

In [18]:
daily_counts = reviews.groupby(reviews["created"].dt.date).size().reset_index(name="count")
daily_counts.head()

Unnamed: 0,created,count
0,2025-01-23,948
1,2025-01-24,1787
2,2025-01-25,1173
3,2025-01-26,963
4,2025-01-27,672


In [22]:
fig = px.bar(
    daily_counts,
    x="created",
    y="count",
    title="Daily review volume (30 days from FF7 Rebirth release)",
    labels={"created": "Date", "count": "Reviews"}
)
fig.update_layout(width=1000, height=500)
fig.show()

In [23]:
daily_sentiment = reviews.groupby(reviews["created"].dt.date)["voted_up"].mean().reset_index(name="positivity")
daily_sentiment.head()

Unnamed: 0,created,positivity
0,2025-01-23,0.817511
1,2025-01-24,0.811416
2,2025-01-25,0.776641
3,2025-01-26,0.785047
4,2025-01-27,0.770833


In [24]:
fig = px.bar(
    daily_sentiment,
    x="created",
    y="positivity",
    title="Daily sentiment (30 days from FF7 Rebirth release)",
    labels={"created": "Date", "positivity": "Sentiment"},
    color="positivity",
    color_continuous_scale=[(0, "red"), (1, "green")],
    range_color=[0, 1],
    range_y=[0, 1]
)
fig.update_yaxes(tickformat=".0%")
fig.update_layout(width=1000, height=500, coloraxis_showscale=False)
fig.show()