In [1]:
import requests
import pandas as pd
import numpy as np

Some test ids

In [2]:
tf2_id = 440
csgo_id = 730
rl_id = 252950
fo4_id = 377160
starfield_id = 1716740

Function to build a row for the dataframe based on the game's avaialble information

In [3]:
def fetch(game_info, categories):
    info = {}
    for category in categories:
        cur_info = game_info
        failed = False
        
        for subpart in category:
            if not failed and subpart in cur_info.keys():
                cur_info = cur_info[subpart]
            else:
                failed = True
        
        if not failed:
            info[' '.join(category)] = cur_info
        else:
            info[' '.join(category)] = np.NaN
    
    # Genre is handled differently from the rest because a game can have multiple genres.
    # They can also have multiple publishers/developers but I don't think that we should
    # treat that the same way, because the numbers of genres should be much less than the
    # numbers of developers.
    # Not sure how the "dummy variable trap" works here either. I think each row should
    # have a column for each genre, but it doesn't make sense for the 0 vector to default
    # to some genre when games can have more than one. For example, if the 0 vector defaulted
    # to FPS, then for a game that is an FPS and RPG, what columns should be marked 1? If
    # it is just the RPG column, then how would you tell which games are just RPGs and which
    # ones are FPS RPG hybrids.
    # Also could use 'categories' instead of 'genres', but 'categories' has a bunch of extra
    # stuff like if the game has steam achievements etc.
    # Could also do both, but that would be a lot of columns (probably will be a bunch with
    # just genres)
    if 'genres' in game_info.keys():
        for genre in game_info['genres']:
            info['Genre: ' + genre['description']] = True
    
    
    return info

Categories to go into the dataframe

In [4]:
categories = [
    ['name'],
    ['type'],
    ['steam_appid'],
    ['developers'],
    ['publishers'],
    ['is_free'],
    ['price_overview', 'initial'], # initial doesn't mean price on launch, it means the price right now excluding sales (at least I'm 90% sure)
    ['achievements', 'total'],
    ['release_date', 'date'],
    ['metacritic', 'score'],
    ['recommendations', 'total']
]

filters = ''
for cat in categories:
    filters += cat[0] + ','
filters += 'basic,genres'#filters[:-1]
filters

'name,type,steam_appid,developers,publishers,is_free,price_overview,achievements,release_date,metacritic,recommendations,basic,genres'

Iterates through listed apps and stores the data\
Potential issue here is that sometimes the information CSGO returns is in Russian. Sometimes it is in English. I didn't see any parameter to specify the result language, so I'm not quite sure what to do about this.\
I may have fixed the issue by adding this filters thing as a parameter? Because the filters are in English.

In [5]:
URL = "http://store.steampowered.com/api/appdetails"
all_info = []
app_ids = [tf2_id, fo4_id, rl_id, csgo_id, starfield_id]

for app_id in app_ids:
    # make api query
    PARAMS = {'appids': app_id, 'filters': filters}
    re = requests.get(url = URL, params = PARAMS)
    game_info = re.json()[str(app_id)]['data']
    # done
    info = fetch(game_info, categories)
    all_info.append(info)

Creates a dataframe from the data

In [6]:
df = pd.DataFrame.from_dict(all_info)
df.head()

Unnamed: 0,name,type,steam_appid,developers,publishers,is_free,price_overview initial,achievements total,release_date date,metacritic score,recommendations total,Genre: Action,Genre: Free to Play,Genre: RPG,Genre: Indie,Genre: Racing,Genre: Sports
0,Team Fortress 2,game,440,[Valve],[Valve],True,,520.0,"Oct 10, 2007",92.0,13106.0,True,True,,,,
1,Fallout 4,game,377160,[Bethesda Game Studios],[Bethesda Softworks],False,1999.0,84.0,"Nov 9, 2015",84.0,174306.0,,,True,,,
2,Rocket League®,game,252950,[Psyonix LLC],[Psyonix LLC],False,,88.0,"Jul 6, 2015",86.0,411496.0,True,,,True,True,True
3,Counter-Strike: Global Offensive,game,730,"[Valve, Hidden Path Entertainment]",[Valve],True,,167.0,"Aug 21, 2012",83.0,3370137.0,True,True,,,,
4,Starfield,game,1716740,[Bethesda Game Studios],[Bethesda Softworks],False,,,11.11.22,,,,,True,,,


Replaces NaN values from the genre columns with false instead.

In [7]:
for col_name in df:
    if col_name.startswith('Genre: '):
        df[col_name].fillna(False, inplace=True)
df.head()

Unnamed: 0,name,type,steam_appid,developers,publishers,is_free,price_overview initial,achievements total,release_date date,metacritic score,recommendations total,Genre: Action,Genre: Free to Play,Genre: RPG,Genre: Indie,Genre: Racing,Genre: Sports
0,Team Fortress 2,game,440,[Valve],[Valve],True,,520.0,"Oct 10, 2007",92.0,13106.0,True,True,False,False,False,False
1,Fallout 4,game,377160,[Bethesda Game Studios],[Bethesda Softworks],False,1999.0,84.0,"Nov 9, 2015",84.0,174306.0,False,False,True,False,False,False
2,Rocket League®,game,252950,[Psyonix LLC],[Psyonix LLC],False,,88.0,"Jul 6, 2015",86.0,411496.0,True,False,False,True,True,True
3,Counter-Strike: Global Offensive,game,730,"[Valve, Hidden Path Entertainment]",[Valve],True,,167.0,"Aug 21, 2012",83.0,3370137.0,True,True,False,False,False,False
4,Starfield,game,1716740,[Bethesda Game Studios],[Bethesda Softworks],False,,,11.11.22,,,False,False,True,False,False,False


Fixes the release dates. The parameter `errors='coerce'` means that if pandas cannot figure out what the date format means, then it will set the value to pd.NaT. This could be important if the date gets returned in another language.

In [8]:
df['release_date date'] = df['release_date date'].apply(lambda x: pd.to_datetime(x, errors='coerce'))
df.head()

Unnamed: 0,name,type,steam_appid,developers,publishers,is_free,price_overview initial,achievements total,release_date date,metacritic score,recommendations total,Genre: Action,Genre: Free to Play,Genre: RPG,Genre: Indie,Genre: Racing,Genre: Sports
0,Team Fortress 2,game,440,[Valve],[Valve],True,,520.0,2007-10-10,92.0,13106.0,True,True,False,False,False,False
1,Fallout 4,game,377160,[Bethesda Game Studios],[Bethesda Softworks],False,1999.0,84.0,2015-11-09,84.0,174306.0,False,False,True,False,False,False
2,Rocket League®,game,252950,[Psyonix LLC],[Psyonix LLC],False,,88.0,2015-07-06,86.0,411496.0,True,False,False,True,True,True
3,Counter-Strike: Global Offensive,game,730,"[Valve, Hidden Path Entertainment]",[Valve],True,,167.0,2012-08-21,83.0,3370137.0,True,True,False,False,False,False
4,Starfield,game,1716740,[Bethesda Game Studios],[Bethesda Softworks],False,,,2022-11-11,,,False,False,True,False,False,False
