In [1]:
# Run this cell to ensure that altair plots show up without having
# the notebook be really large.
# We will talk more about what these lines do later in the course

import os
import altair as alt
import pandas as pd
from toolz.curried import pipe
import ast
import numpy as np

# Create a new data transformer that stores the files in a directory
def json_dir(data, data_dir='altairdata'):
    os.makedirs(data_dir, exist_ok=True)
    return pipe(data, alt.to_json(filename=data_dir + '/{prefix}-{hash}.{extension}') )

# Register and enable the new transformer
alt.data_transformers.register('json_dir', json_dir)
alt.data_transformers.enable('json_dir')

# Handle large data sets (default shows only 5000)
# See here: https://altair-viz.github.io/user_guide/data_transformers.html
alt.data_transformers.disable_max_rows()
alt.renderers.enable('jupyter')

path = 'data/processed/cleaned_games.csv'
data = pd.read_csv(path)

In [3]:
countByDeveloper = data.groupby('developers')['review_score'].mean().reset_index(name='Count')

top10Dev = countByDeveloper.sort_values('Count',ascending=False)


dataWithTop10Dev = data[data['developers'].isin(top10Dev['developers'])]
dataWithTop10Dev['categories'] = dataWithTop10Dev['categories'].apply(ast.literal_eval)
dataWithTop10Dev = dataWithTop10Dev.explode('categories')


removeGenres = ['Captions available','In-App Purchases','Remote Play Together' ,
                'Partial Controller Support','Cross-Platform Multiplayer',
                'Online PvP','Full controller support', 'HDR avaliable', 
                'Includes level editor','HDR available', 'Remote Play on Tablet',
                'Shared/Split Screen Co-op', 'Shared/Split Screen','Shared/Split Screen PvP',
                'Stats','Steam Trading Cards','Steam Workshop','Family Sharing','VR Only', 'VR Support', 
                'VR Supported', 'Tracked Controller Support','Steam Leaderboards', 'Steam Cloud', 'Mods', 'Steam Achievements',
                'Commentry Avaliable','Shared/Split Screen PvP','Steam Turn Notifications', 'Commentary available', 'Steam Timeline', 
                'Includes Source SDK', 'SteamVR Collectibles','Valve Anti-Cheat enabled', 'Mods (require HL2)',
                'LAN Co-op', 'LAN PvP']

filteredData = dataWithTop10Dev[~dataWithTop10Dev['categories'].isin(removeGenres)].dropna()

selector = alt.selection_point(fields=['categories'])

dropdown = alt.selection_point(
    name='categories',
    fields=['categories'],
    bind=alt.binding_select(options=filteredData['categories'].unique().tolist(), name='Select categories')
)

plotOfReviews = alt.Chart(filteredData).mark_line().encode(
    x=alt.X('review_score:Q',title='Review Scores'),
    y=alt.Y('count(total_reviews)',title='Total votes'),
    color=alt.condition(selector, 'categories:N', alt.value('lightgray')),
    tooltip='mean(review_score):Q'
).transform_filter(
    dropdown  
).add_params(
    selector,
    dropdown
).properties(
    width=700,
    height=500,
    title='Total amount of scores for each category'
)

categoryDistrabution = alt.Chart(filteredData).mark_bar().encode(
    x='count(categories):N',
    y='categories:N',
    color='categories:N',
    tooltip='count(categories):N'
).add_params(
    selector
).properties(
    title='Total count of each category'
)

pieCount = alt.Chart(filteredData).mark_arc().encode(
    theta='count(categories):N',  
    color=alt.condition(selector, 'categories:N', alt.value('lightgray')),
    tooltip='categories:N'
).add_params(
    selector
).properties(
    title='Relative Size comparisons of Categories'
)

dropChart = alt.Chart().mark_text().encode().add_params(dropdown)

view =  (dropChart & categoryDistrabution & pieCount) | (plotOfReviews)

view
view.save('gur1.json')

In [None]:
bottomRated = data.sort_values(by='review_score',ascending=True).head(20000)
topRated = data.sort_values(by='review_score',ascending=False).head(20000)

bottomRated['categories'] = bottomRated['categories'].apply(lambda x: x.split(','))
bottomRated['catSize'] = bottomRated['categories'].apply(len)

topRated['categories'] = topRated['categories'].apply(lambda x: x.split(','))
topRated['catSize'] = topRated['categories'].apply(len)

total = pd.DataFrame(data)
total['categories'] = total['categories'].apply(lambda x: x.split(','))
total['catSize'] = total['categories'].apply(len)



bottomChart = alt.Chart(bottomRated).mark_area().encode(
    x=alt.X('catSize:Q',title='Number of Categories'),
    y=alt.Y('count(n_achievement):Q',title='Total number of Achievements'),
    color='review_score'
).properties(
    width=300,
    height=300,
    title='Amount of Categories in Lowest 20000 Rated Games'
)

topChart = alt.Chart(topRated).mark_area().encode(
    x=alt.X('catSize:Q',title='Number of Categories'),
    y=alt.Y('count(n_achievement):Q',title='Total number of Achievements'),
    color='review_score'
).properties(
    width=300,
    height=300,
    title='Amount of Categories in Highest 20000 Rated Games'
)

slider = alt.selection_point(
    name='score_slider',
    fields=['review_score'],
    bind=alt.binding_range(min=0, max=9, step=1, name="Select Score"),
    value=5 
)

categoriesForIndividualScore = alt.Chart(total).mark_bar().encode(
    x=alt.X('catSize:Q',title='Number of Categories'),
    y=alt.Y('count(n_achievement):Q',title='Total number of Achievements'),
    color='review_score'
).add_params(
    slider
).transform_filter(
    slider
).properties(
    width=400,
    height=300,
    title='Distrabution of Category amount per Rating Score'
)

view = (bottomChart | topChart) & (categoriesForIndividualScore)
view


In [None]:
catCountData = pd.DataFrame(data)
catCountData['categories'] = data['categories'].apply(ast.literal_eval)
catCountData = catCountData.explode('categories')

catCountSummary = catCountData.groupby('developers').agg(
    count=('categories', 'nunique'),  
    score=('review_score', 'mean')  
).reset_index()

bottomRated = catCountSummary.sort_values(by='score', ascending=True).head(20000)
topRated = catCountSummary.sort_values(by='score', ascending=False).head(20000)

topRated['Group'] = 'Top Rated'
bottomRated['Group'] = 'Bottom Rated'
combined = pd.concat([topRated, bottomRated])



density = alt.Chart(combined).transform_density(
    'count',
    as_=['count', 'density'],
    groupby=['Group']
).mark_area(
    opacity=0.5
).encode(
    x='count:Q',
    y='density:Q',
    color='Group:N',
    tooltip=['Group:N', 'count:Q', 'density:Q']
).facet(
    column = alt.Column("Group:N", title=None)
).properties(
    title='Density of Categories in Lowest and Highest preforming games'
)

pie = alt.Chart(catCountSummary).mark_arc().encode(
    theta='score',
    color=alt.Color('count:Q', scale=alt.Scale(scheme='viridis')),
    tooltip='count'
).properties(
    title='Difference in amount of Categories for each score'
)

scores = [0,1,2,3,4,5,6,7,8,9]
scoreRadio = alt.binding_radio(options=scores, name="Scores")
selectScore = alt.selection_point(fields=['score'], bind=scoreRadio)

scoreColor = (
    alt.when(selectScore)
    .then(alt.Color("score:Q"))
    .otherwise(alt.value("lightgray"))
)

points = alt.Chart(catCountSummary).mark_bar(filled=True).encode(
    y='score',
    x='count'
).add_params(
    selectScore
).encode(
    color=scoreColor
).properties(title="Scores vs number of categories")

density & (points | pie)