In [1]:
import pandas as pd 
import json
import ast

In [2]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

Since the JSON file wasn't in the format of a valid JSON file,  I decided to first change the extension of steam_games.json to steam_games.txt and then opened the text file and processed it line by line. I used Python's ast.literal_eval method that reads each line and converts it to the appropriate data structure, in this case a dictionary. This was done so that the valid JSON file could be converted to a dataframe

In [29]:
# Script to convert the steam_games_copy.txt file to a JSON file

# File paths
input_file = r'steam_games.txt'
output_file = r'steam_games_preprocessed.json'

# Read the input file
with open(input_file, 'r', encoding='utf-8') as file:
    raw_data = file.readlines()

# Convert each line to a Python dictionary and store in a list
data = []
for line in raw_data:
    try:
        # Use ast.literal_eval to safely evaluate the Python-style dictionary
        record = ast.literal_eval(line.strip())
        data.append(record)
    except (ValueError, SyntaxError) as e:
        print(f"Error parsing line: {line.strip()}\n{e}")

# Write the data to a JSON file
with open(output_file, 'w', encoding='utf-8') as json_file:
    json.dump(data, json_file, indent=4, ensure_ascii=False)

print(f"Data successfully converted and saved to {output_file}")

Data successfully converted and saved to steam_games_preprocessed.json


In [3]:
# Read the JSON file and convert it to a DataFrame
games_df = pd.read_json("steam_games_preprocessed.json")

In [4]:
games_df.head(16)

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore
0,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",4.49,http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,False,761140.0,Kotoshiro,,
1,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",,http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,False,643980.0,Secret Level SRL,Mostly Positive,
2,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",,http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,False,670290.0,Poolians.com,Mostly Positive,
3,彼岸领域,"[Action, Adventure, Casual]",弹炸人2222,弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,"[Action, Adventure, Casual]",0.83,http://steamcommunity.com/app/767400/reviews/?...,[Single-player],0.99,False,767400.0,彼岸领域,,
4,,,Log Challenge,,http://store.steampowered.com/app/773570/Log_C...,,"[Action, Indie, Casual, Sports]",1.79,http://steamcommunity.com/app/773570/reviews/?...,"[Single-player, Full controller support, HTC V...",2.99,False,773570.0,,,
5,Trickjump Games Ltd,"[Action, Adventure, Simulation]",Battle Royale Trainer,Battle Royale Trainer,http://store.steampowered.com/app/772540/Battl...,2018-01-04,"[Action, Adventure, Simulation, FPS, Shooter, ...",,http://steamcommunity.com/app/772540/reviews/?...,"[Single-player, Steam Achievements]",3.99,False,772540.0,Trickjump Games Ltd,Mixed,
6,,"[Free to Play, Indie, Simulation, Sports]",SNOW - All Access Basic Pass,SNOW - All Access Basic Pass,http://store.steampowered.com/app/774276/SNOW_...,2018-01-04,"[Free to Play, Indie, Simulation, Sports]",,http://steamcommunity.com/app/774276/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",9.99,False,774276.0,Poppermost Productions,,
7,Poppermost Productions,"[Free to Play, Indie, Simulation, Sports]",SNOW - All Access Pro Pass,SNOW - All Access Pro Pass,http://store.steampowered.com/app/774277/SNOW_...,2018-01-04,"[Free to Play, Indie, Simulation, Sports]",,http://steamcommunity.com/app/774277/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",18.99,False,774277.0,Poppermost Productions,,
8,Poppermost Productions,"[Free to Play, Indie, Simulation, Sports]",SNOW - All Access Legend Pass,SNOW - All Access Legend Pass,http://store.steampowered.com/app/774278/SNOW_...,2018-01-04,"[Free to Play, Indie, Simulation, Sports]",,http://steamcommunity.com/app/774278/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",29.99,False,774278.0,Poppermost Productions,,
9,RewindApp,"[Casual, Indie, Racing, Simulation]",Race,Race,http://store.steampowered.com/app/768800/Race/,2018-01-04,"[Indie, Casual, Simulation, Racing]",,http://steamcommunity.com/app/768800/reviews/?...,"[Single-player, Multi-player, Partial Controll...",,False,768800.0,RewindApp,,


In [5]:
games_df['sentiment'].value_counts()
# A lot of the sentiments are pretty random, 1 user reviews, 2 user reviews, etc.

sentiment
Mixed                      4103
Very Positive              3868
Positive                   3281
Mostly Positive            2744
1 user reviews             2496
2 user reviews             1756
3 user reviews             1231
4 user reviews              964
5 user reviews              846
Mostly Negative             802
6 user reviews              756
7 user reviews              619
8 user reviews              537
9 user reviews              488
Overwhelmingly Positive     303
Negative                    123
Very Negative                29
Overwhelmingly Negative       7
Name: count, dtype: int64

The games that don't have a proper sentiment have too few reviews for Steam to give it a sentiment value. Perhaps we can ignore these video games for Bayesian Scoring

In [None]:
# Filter entries with a specific sentiment, e.g., 'Positive'
filtered_entries = games_df[games_df['sentiment'] == '1 user reviews']

# Display the filtered entries
filtered_entries.head()

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore
15,Apillo,"[Adventure, Casual, Indie, Simulation, Strategy]",The first thrust of God,The first thrust of God,http://store.steampowered.com/app/773650/The_f...,2018-01-03,"[Strategy, Adventure, Indie, Casual, Simulatio...",,http://steamcommunity.com/app/773650/reviews/?...,"[Single-player, Steam Achievements, Steam Cloud]",1.59,False,773650.0,Apillo,1 user reviews,
50,Wonderbox Games,"[Action, Indie]",Orbitality,Orbitality,http://store.steampowered.com/app/753100/Orbit...,2017-12-29,"[Indie, Action]",5.99,http://steamcommunity.com/app/753100/reviews/?...,"[Single-player, Steam Achievements]",9.99,False,753100.0,Wonderbox Games,1 user reviews,
201,Dovetail Games,[Simulation],UK Van Wagon Pack,UK Van Wagon Pack,http://store.steampowered.com/app/256535/UK_Va...,2009-01-01,[Simulation],,http://steamcommunity.com/app/256535/reviews/?...,"[Single-player, Downloadable Content, Steam Ac...",4.99,False,256535.0,RailSimulator.com,1 user reviews,
241,Twisted Pixel Games,[Indie],The Maw: Speeder Lane,The Maw: Speeder Lane,http://store.steampowered.com/app/26007/The_Ma...,2009-04-29,[Indie],,http://steamcommunity.com/app/26007/reviews/?b...,"[Single-player, Downloadable Content, Steam Ac...",1.25,False,26007.0,Twisted Pixel Games,1 user reviews,
328,Nitro Games,[Strategy],East India Company: Privateer,East India Company: Privateer,http://store.steampowered.com/app/42800/East_I...,2009-10-27,[Strategy],,http://steamcommunity.com/app/42800/reviews/?b...,"[Single-player, Downloadable Content]",4.99,False,42800.0,Nitro Games,1 user reviews,63.0


There is another column that could be used to get an overall rating of video games, called the Metascore 

In [8]:
games_df['metascore'].value_counts().head(10)

metascore
80    125
74    107
77    104
75    103
76    102
78     99
72     98
68     97
73     95
81     95
Name: count, dtype: int64

In [None]:
non_null_metascore_count = games_df['metascore'].notnull().sum()

print(f"Number of entries with non-null metascore: {non_null_metascore_count}")

Number of entries with non-null metascore: 2677


Not enough video games have metascore, so we'll stick to sentiment

We have lots of video games without a proper sentiment rating, so we can remove them

In [45]:
games_df.shape

(32135, 16)

In [47]:
# Remove rows where the 'sentiment' column has missing values
updated_games_df = games_df.dropna(subset=['sentiment'])

# Display the updated DataFrame
updated_games_df.head()

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore
1,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",,http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,False,643980.0,Secret Level SRL,Mostly Positive,
2,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",,http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,False,670290.0,Poolians.com,Mostly Positive,
5,Trickjump Games Ltd,"[Action, Adventure, Simulation]",Battle Royale Trainer,Battle Royale Trainer,http://store.steampowered.com/app/772540/Battl...,2018-01-04,"[Action, Adventure, Simulation, FPS, Shooter, ...",,http://steamcommunity.com/app/772540/reviews/?...,"[Single-player, Steam Achievements]",3.99,False,772540.0,Trickjump Games Ltd,Mixed,
15,Apillo,"[Adventure, Casual, Indie, Simulation, Strategy]",The first thrust of God,The first thrust of God,http://store.steampowered.com/app/773650/The_f...,2018-01-03,"[Strategy, Adventure, Indie, Casual, Simulatio...",,http://steamcommunity.com/app/773650/reviews/?...,"[Single-player, Steam Achievements, Steam Cloud]",1.59,False,773650.0,Apillo,1 user reviews,
21,Tero Lunkka,"[Action, Adventure, Indie]",The Warrior Of Treasures,The Warrior Of Treasures,http://store.steampowered.com/app/768060/The_W...,2018-01-03,"[Action, Adventure, Indie]",0.59,http://steamcommunity.com/app/768060/reviews/?...,[Single-player],0.99,False,768060.0,Tero Lunkka,3 user reviews,


In [None]:
updated_games_df.shape
# 24593 video games with a sentiment value

(24953, 16)

In [10]:
# Drop all rows with improper sentiment values

# Define a list of valid sentiment values
valid_sentiments = ['Positive', 'Very Positive', 'Mostly Positive',
                    'Overwhelmingly Positive', 
                    'Mixed', 'Negative', 'Very Negative',
                    'Mostly Negative', 'Overwhelmingly Negative']

# Filter the DataFrame to keep only rows with valid sentiment values
valid_sentiment_games_df = games_df[games_df['sentiment'].isin(valid_sentiments)]

In [11]:
# Display the cleaned DataFrame
valid_sentiment_games_df.head()

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore
1,"Making Fun, Inc.","[Free to Play, Indie, RPG, Strategy]",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"[Free to Play, Strategy, Indie, RPG, Card Game...",,http://steamcommunity.com/app/643980/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free To Play,False,643980.0,Secret Level SRL,Mostly Positive,
2,Poolians.com,"[Casual, Free to Play, Indie, Simulation, Sports]",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"[Free to Play, Simulation, Sports, Casual, Ind...",,http://steamcommunity.com/app/670290/reviews/?...,"[Single-player, Multi-player, Online Multi-Pla...",Free to Play,False,670290.0,Poolians.com,Mostly Positive,
5,Trickjump Games Ltd,"[Action, Adventure, Simulation]",Battle Royale Trainer,Battle Royale Trainer,http://store.steampowered.com/app/772540/Battl...,2018-01-04,"[Action, Adventure, Simulation, FPS, Shooter, ...",,http://steamcommunity.com/app/772540/reviews/?...,"[Single-player, Steam Achievements]",3.99,False,772540.0,Trickjump Games Ltd,Mixed,
27,Stainless Games Ltd,"[Action, Indie, Racing]",Carmageddon Max Pack,Carmageddon Max Pack,http://store.steampowered.com/app/282010/Carma...,1997-06-30,"[Racing, Action, Classic, Indie, Gore, 1990's,...",,http://steamcommunity.com/app/282010/reviews/?...,"[Single-player, Multi-player, Steam Trading Ca...",9.99,False,282010.0,Stainless Games Ltd,Very Positive,
28,Valve,[Action],Half-Life,Half-Life,http://store.steampowered.com/app/70/HalfLife/,1998-11-08,"[FPS, Classic, Action, Sci-fi, Singleplayer, S...",,http://steamcommunity.com/app/70/reviews/?brow...,"[Single-player, Multi-player, Valve Anti-Cheat...",9.99,False,70.0,Valve,Overwhelmingly Positive,96.0


In [None]:
valid_sentiment_games_df.shape
# now we have 15260 games with a valid sentiment score

(15260, 16)

-------------------------------------------------------------------------

# Simple Demo of the Trending Now Page