In [107]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import matplotlib.pyplot as plt

In [108]:
np.random.seed(42)

In [109]:
# Get the data from: https://www.kaggle.com/datasets/najzeko/steam-reviews-2021/data

In [110]:
# Get the total number of rows in the CSV file
total_rows = sum(1 for row in open('data/steam_reviews.csv')) - 1  # subtract 1 for header
sample_size = int(total_rows * 0.1)

# Read only 10% of the CSV file
steam_reviews = pd.read_csv('data/steam_reviews.csv', skiprows=lambda x: x > 0 and np.random.rand() > 0.1, nrows=sample_size)

In [111]:
# Get all unique app names present in the dataset
app_names = steam_reviews['app_name'].unique()
app_names = app_names.tolist()
app_names

['The Witcher 3: Wild Hunt',
 'Half-Life',
 'Counter-Strike: Source',
 'Half-Life 2: Episode Two',
 'Portal 2',
 'X Rebirth',
 "Garry's Mod",
 "Sid Meier's Civilization V",
 'Dead by Daylight',
 "Sid Meier's Civilization VI",
 'Subnautica',
 'Human: Fall Flat',
 'Beat Saber',
 'Cold Waters',
 'Banished',
 'Celeste',
 'Getting Over It with Bennett Foddy',
 'A Hat in Time',
 'Overcooked! 2',
 'Slipstream',
 'The Forest',
 'Pogostuck: Rage With Your Friends',
 'PC Building Simulator',
 'RollerCoaster Tycoon World',
 'NBA 2K18',
 'NBA 2K21',
 'Deus Ex: The Fall',
 'Rapture Rejects',
 'Artifact',
 'Call of Duty: Infinite Warfare',
 'Cube World',
 'NBA 2K19',
 'Nether',
 'Wolfenstein: Youngblood',
 'Warhammer 40,000: Dawn of War III',
 'Takedown: Red Sabre',
 'ATLAS',
 'Stay Out',
 'Identity',
 'Umbrella Corps',
 'Hunt Down The Freeman',
 'WWE 2K20',
 'Down To One',
 'Axiom Verge',
 'Guacamelee! Super Turbo Championship Edition',
 'The Binding of Isaac: Rebirth',
 'To the Moon',
 'Cave Story

In [112]:
# Number of reviews in English
steam_reviews[(steam_reviews["language"] == "english")].shape

(962962, 23)

In [113]:
#Possible review Bombing for GTA V between 2017-06-01 and 2017-07-31
one_game_only_english = steam_reviews[(steam_reviews["app_name"].str.contains("Grand Theft Auto", case = False)) 
                                      & (steam_reviews["language"] == "english")
                                      & (steam_reviews["recommended"] == False)
                                      & (steam_reviews["timestamp_updated"] > 1496268000)
                                      & (steam_reviews["timestamp_updated"] < 1501538399)
                                      & ((steam_reviews["review"].str.contains("Take-Two", case = False)) 
                                         | (steam_reviews["review"].str.contains("OpenIV", case = False)))]
one_game_only_english.head()

Unnamed: 0.1,Unnamed: 0,app_id,app_name,review_id,language,review,timestamp_created,timestamp_updated,recommended,votes_helpful,...,steam_purchase,received_for_free,written_during_early_access,author.steamid,author.num_games_owned,author.num_reviews,author.playtime_forever,author.playtime_last_two_weeks,author.playtime_at_review,author.last_played
1349925,13496915,271590,Grand Theft Auto V,33812000,english,OpenIV fiasco shows how a company tries to dic...,1501085473,1501085531,False,0,...,True,False,False,76561198024673004,15,2,10175.0,0.0,4410.0,1509200000.0
1350420,13501446,271590,Grand Theft Auto V,33367091,english,Give us openiv back,1499502010,1499502010,False,3,...,True,False,False,76561198054595216,88,3,11103.0,0.0,10133.0,1569958000.0
1350441,13501704,271590,Grand Theft Auto V,33345032,english,Take Two basically killed their own reputation...,1499439418,1499439567,False,2,...,False,False,False,76561198087267083,20,2,9593.0,0.0,1280.0,1565367000.0
1350443,13501715,271590,Grand Theft Auto V,33343849,english,What's done is done. Removing OpenIV greatly d...,1499436776,1499436776,False,2,...,True,False,False,76561198239439730,24,1,16134.0,0.0,7453.0,1592070000.0
1350468,13501935,271590,Grand Theft Auto V,33329577,english,Ever since the recent mistake take two has mad...,1499390873,1499390873,False,1,...,True,False,False,76561198198306749,12,2,7086.0,0.0,3314.0,1592323000.0


In [114]:
total_gta_reviews = steam_reviews[(steam_reviews["app_name"].str.contains("Grand Theft Auto", case = False))
                                  & (steam_reviews["language"] == "english")
                                  & (steam_reviews["timestamp_updated"] > 1496268000)
                                  & (steam_reviews["timestamp_updated"] < 1501538399)]
total_gta_reviews.head()

Unnamed: 0.1,Unnamed: 0,app_id,app_name,review_id,language,review,timestamp_created,timestamp_updated,recommended,votes_helpful,...,steam_purchase,received_for_free,written_during_early_access,author.steamid,author.num_games_owned,author.num_reviews,author.playtime_forever,author.playtime_last_two_weeks,author.playtime_at_review,author.last_played
1349836,13496038,271590,Grand Theft Auto V,33917530,english,This Positive Review was Ruined by the Error\n...,1501462530,1501462530,False,1,...,True,False,False,76561198033281896,143,1,25309.0,0.0,16139.0,1598193000.0
1349838,13496053,271590,Grand Theft Auto V,33915584,english,This game is fun and ha a lot of aspects to it...,1501453984,1501453984,True,0,...,True,True,False,76561198332312545,18,1,2823.0,65.0,2659.0,1610338000.0
1349846,13496157,271590,Grand Theft Auto V,33901816,english,░░░░░░░░░░░░░░░░░░░░░░█████████\n░░███████░░░░...,1501411178,1501411178,True,1,...,True,False,False,76561198035380665,129,74,17746.0,0.0,15743.0,1579447000.0
1349848,13496167,271590,Grand Theft Auto V,33900688,english,Always a have a fun time playing online and th...,1501407064,1501407064,True,1,...,True,False,False,76561197984771212,356,2,8695.0,0.0,837.0,1526875000.0
1349849,13496172,271590,Grand Theft Auto V,33899864,english,lets m e m e the community by banning people f...,1501403596,1501403596,False,2,...,True,False,False,76561198085821142,182,6,10094.0,0.0,6345.0,1580589000.0


In [115]:
# Compare the number of rows of both tables
rows_one_game_only_english = one_game_only_english.shape[0]
rows_total_gta_reviews = total_gta_reviews.shape[0]

print(f"Number of rows in one_game_only_english: {rows_one_game_only_english}")
print(f"Number of rows in total_gta_reviews: {rows_total_gta_reviews}")
print(f"Percentage of review bombing: {rows_one_game_only_english / rows_total_gta_reviews * 100:.2f}%")

Number of rows in one_game_only_english: 655
Number of rows in total_gta_reviews: 4111
Percentage of review bombing: 15.93%
