In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt

In [None]:
# Get the data from: https://www.kaggle.com/datasets/najzeko/steam-reviews-2021/data

In [2]:
# Get the total number of rows in the CSV file
total_rows = sum(1 for row in open('data/steam_reviews.csv')) - 1  # subtract 1 for header
sample_size = int(total_rows * 0.1)

# Read only 10% of the CSV file
steam_reviews = pd.read_csv('data/steam_reviews.csv', skiprows=lambda x: x > 0 and np.random.rand() > 0.1, nrows=sample_size)

In [9]:
# Get all unique app names present in the dataset
app_names = steam_reviews['app_name'].unique()
app_names = app_names.tolist()
app_names

['The Witcher 3: Wild Hunt',
 'Half-Life',
 'Counter-Strike: Source',
 'Half-Life 2: Episode Two',
 'Portal 2',
 'X Rebirth',
 "Garry's Mod",
 "Sid Meier's Civilization V",
 'Dead by Daylight',
 "Sid Meier's Civilization VI",
 'Subnautica',
 'Human: Fall Flat',
 'Beat Saber',
 'Cold Waters',
 'Banished',
 'Celeste',
 'Getting Over It with Bennett Foddy',
 'A Hat in Time',
 'Overcooked! 2',
 'Slipstream',
 'The Forest',
 'Pogostuck: Rage With Your Friends',
 'PC Building Simulator',
 'RollerCoaster Tycoon World',
 'NBA 2K18',
 'NBA 2K21',
 'Deus Ex: The Fall',
 'Rapture Rejects',
 'Artifact',
 'Call of Duty: Infinite Warfare',
 'Cube World',
 'NBA 2K19',
 'Nether',
 'Wolfenstein: Youngblood',
 'Warhammer 40,000: Dawn of War III',
 'Takedown: Red Sabre',
 'ATLAS',
 'Stay Out',
 'Identity',
 'Umbrella Corps',
 'Hunt Down The Freeman',
 'WWE 2K20',
 'Down To One',
 'Axiom Verge',
 'Guacamelee! Super Turbo Championship Edition',
 'The Binding of Isaac: Rebirth',
 'To the Moon',
 'Cave Story

In [71]:
#Possible review Bombing for GTA V between 2017-06-01 and 2017-08-31
one_game_only_english = steam_reviews[(steam_reviews["app_name"].str.contains("Grand Theft Auto", case = False)) 
                                      & (steam_reviews["language"] == "english")
                                      & (steam_reviews["recommended"] == False)
                                      & (steam_reviews["timestamp_updated"] > 1496268000)
                                      & (steam_reviews["timestamp_updated"] < 1504130400)
                                      & ((steam_reviews["review"].str.contains("Take-Two", case = False)) 
                                         | (steam_reviews["review"].str.contains("OpenIV", case = False)))]
one_game_only_english.head()

Unnamed: 0.1,Unnamed: 0,app_id,app_name,review_id,language,review,timestamp_created,timestamp_updated,recommended,votes_helpful,...,steam_purchase,received_for_free,written_during_early_access,author.steamid,author.num_games_owned,author.num_reviews,author.playtime_forever,author.playtime_last_two_weeks,author.playtime_at_review,author.last_played
1350800,13495662,271590,Grand Theft Auto V,33962788,english,"Great singleplayer, multiplayer sucks, terribl...",1501644652,1501644652,False,3,...,True,False,False,76561198132924456,38,9,15105.0,166.0,8513.0,1610421000.0
1350811,13495727,271590,Grand Theft Auto V,33955270,english,You've fucked up... The new policy against sin...,1501613759,1501613759,False,2,...,True,False,False,76561198189604237,106,7,34257.0,0.0,8587.0,1587179000.0
1351185,13499372,271590,Grand Theft Auto V,33543458,english,Even despite all the greedy shit you see regar...,1500062032,1500062032,False,2,...,True,False,False,76561197995197483,95,10,125682.0,0.0,90563.0,1587524000.0
1351334,13500925,271590,Grand Theft Auto V,33406827,english,"Great story, I’ve beat it on Xbox360, PS4 and ...",1499613657,1499613902,False,2,...,True,False,False,76561198184426493,269,7,7052.0,0.0,4126.0,1596399000.0
1351351,13501020,271590,Grand Theft Auto V,33399885,english,I'm giving this a bad review even though mods ...,1499595404,1499595404,False,2,...,True,False,False,76561198154315720,56,4,11114.0,52.0,9474.0,1610401000.0


In [72]:
total_gta_reviews = steam_reviews[(steam_reviews["app_name"].str.contains("Grand Theft Auto", case = False))
                                  & (steam_reviews["language"] == "english")
                                  & (steam_reviews["timestamp_updated"] > 1496268000)
                                  & (steam_reviews["timestamp_updated"] < 1504130400)]
total_gta_reviews.head()

Unnamed: 0.1,Unnamed: 0,app_id,app_name,review_id,language,review,timestamp_created,timestamp_updated,recommended,votes_helpful,...,steam_purchase,received_for_free,written_during_early_access,author.steamid,author.num_games_owned,author.num_reviews,author.playtime_forever,author.playtime_last_two_weeks,author.playtime_at_review,author.last_played
1350094,13488684,271590,Grand Theft Auto V,34615343,english,So.. about the hackers..,1504118575,1504118575,True,0,...,False,False,False,76561198063532715,197,23,2777.0,4.0,1262.0,1610933000.0
1350099,13488736,271590,Grand Theft Auto V,34610097,english,The multiplayer experience is fun with friends...,1504105512,1504105512,False,2,...,False,False,False,76561198070093498,25,2,13720.0,0.0,12586.0,1482410000.0
1350109,13488832,271590,Grand Theft Auto V,34602798,english,"Hackers, a lot of them. and game play is more ...",1504086318,1504086318,False,6,...,True,False,False,76561198212339955,33,2,55152.0,0.0,33473.0,1591718000.0
1350112,13488855,271590,Grand Theft Auto V,34600758,english,NODER GOOD GAME,1504079999,1504079999,True,0,...,True,False,False,76561198190295782,19,3,8058.0,0.0,3431.0,1603400000.0
1350115,13488867,271590,Grand Theft Auto V,34599703,english,"Holy crap, this game has turned into a huge mo...",1504076371,1504076371,False,10,...,True,False,False,76561198001099030,267,6,51000.0,0.0,19727.0,1609126000.0


In [77]:
# Compare the number of rows of both tables
rows_one_game_only_english = one_game_only_english.shape[0]
rows_total_gta_reviews = total_gta_reviews.shape[0]

print(f"Number of rows in one_game_only_english: {rows_one_game_only_english}")
print(f"Number of rows in total_gta_reviews: {rows_total_gta_reviews}")
print(f"Percentage of review bombing: {rows_one_game_only_english / rows_total_gta_reviews * 100:.2f}%")

Number of rows in one_game_only_english: 668
Number of rows in total_gta_reviews: 4307
Percentage of review bombing: 15.51%
