## Research Question: Most Popular Steam Games

1. What were the top 5 games of the year in 2023-2024 based on trends of ratings?
   
2. What did the distribution of review score descriptions appear for the top 5 games?
   
3. Is there a pattern in the metadata, such as developer, genres, and pricing, that appear for the top 5 games?

In [None]:
# Run this cell to ensure that altair plots show up without having
# the notebook be really large.
# We will talk more about what these lines do later in the course

import os
import altair as alt
import pandas as pd
from toolz.curried import pipe
import numpy as np
import ast

# Create a new data transformer that stores the files in a directory
def json_dir(data, data_dir='altairdata'):
    os.makedirs(data_dir, exist_ok=True)
    return pipe(data, alt.to_json(filename=data_dir + '/{prefix}-{hash}.{extension}') )

# Register and enable the new transformer
alt.data_transformers.register('json_dir', json_dir)
alt.data_transformers.enable('json_dir')

# Handle large data sets (default shows only 5000)
# See here: https://altair-viz.github.io/user_guide/data_transformers.html
alt.data_transformers.disable_max_rows()

alt.renderers.enable('jupyterlab')

ModuleNotFoundError: No module named 'altair'

In [None]:
path = '../../data/processed/cleaned_games.csv'
data = pd.read_csv(path)

In [None]:
data["review_ratio"] = data["total_positive"] / (data["total_negative"] + 1)
data

Unnamed: 0.1,Unnamed: 0,steam_appid,name,developers,publishers,categories,genres,required_age,n_achievements,platforms,...,total_reviews,total_positive,total_negative,review_score,review_score_desc,positive_percentual,metacritic,is_free,price_initial (USD),review_ratio
0,0,2719580,勇者の伝説の勇者,['ぽけそう'],['ぽけそう'],"['Single-player', 'Family Sharing']","['Casual', 'Indie']",0,0,['windows'],...,0,0,0,0.0,No user reviews,0.0,0,False,0.99,0.000000
1,2,2719600,Lorhaven: Cursed War,['GoldenGod Games'],['GoldenGod Games'],"['Single-player', 'Multi-player', 'PvP', 'Shar...","['RPG', 'Strategy']",0,32,"['windows', 'mac']",...,9,8,1,0.0,9 user reviews,88.9,0,False,9.99,4.000000
2,3,2719610,PUIQ: Demons,['Giammnn'],['Giammnn'],"['Single-player', 'Steam Achievements', 'Famil...","['Action', 'Casual', 'Indie', 'RPG']",0,28,['windows'],...,0,0,0,0.0,No user reviews,0.0,0,False,2.99,0.000000
3,4,2719650,Project XSTING,['Saucy Melon'],['Saucy Melon'],"['Single-player', 'Steam Achievements', 'Steam...","['Action', 'Casual', 'Indie', 'Early Access']",0,42,['windows'],...,9,9,0,0.0,9 user reviews,100.0,0,False,7.99,9.000000
4,7,2719710,Manor Madness,['Apericot Studio'],['Apericot Studio'],"['Single-player', 'Steam Achievements', 'HDR a...","['Action', 'Adventure', 'Indie', 'RPG', 'Simul...",0,5,"['windows', 'mac', 'linux']",...,0,0,0,0.0,No user reviews,0.0,0,True,0.00,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58036,71423,2294320,PIP XL,"[""Cute Hannah's Games""]","[""Cute Hannah's Games""]","['Single-player', 'Steam Achievements', 'Steam...","['Adventure', 'Casual', 'Indie', 'Simulation',...",0,100,['windows'],...,1,1,0,0.0,1 user reviews,100.0,0,False,1.99,1.000000
58037,71424,2294300,Touchdown Pinball,['Super PowerUp Games'],['Super PowerUp Games'],"['Single-player', 'Steam Achievements', 'Full ...",['Simulation'],0,15,['windows'],...,2,2,0,0.0,2 user reviews,100.0,0,False,2.99,2.000000
58038,71426,2294160,Isle of Swaps,['Fuzz Force'],['Fuzz Force'],"['Single-player', 'Steam Achievements', 'Steam...","['Indie', 'RPG', 'Strategy']",0,29,['windows'],...,72,66,6,8.0,Very Positive,91.7,0,False,14.99,9.428571
58039,71427,2294130,Goobies,['Knifes'],['Knifes'],"['Single-player', 'Steam Achievements', 'Full ...","['Action', 'Casual', 'Indie', 'RPG', 'Strategy']",0,43,['windows'],...,1442,1312,130,8.0,Very Positive,91.0,0,False,5.99,10.015267


In [3]:
selector = alt.selection_point(fields=['name'])
# brush = alt.selection_point(encoding = ['x','y'])

# Corrected color encoding logic
color = alt.condition(
    selector,  # If the point is selected
    alt.Color("is_free:N"),  # Color based on 'id' for the selected point
    alt.value("lightgray")  # Otherwise, color all points lightgray
)

# Scatter plot
scatter = alt.Chart(data).mark_point().encode(
    alt.X('total_reviews'),
    alt.Y('review_ratio'),
    alt.Tooltip('name'),
    color=color
).add_params(selector)

# Box plot
box = alt.Chart(data).mark_bar().encode(
    alt.X('review_score_desc'),
    alt.Y('review_score')
).transform_filter(selector)

# Combine scatter plot and box plot
scatter | box

NameError: name 'alt' is not defined