In [87]:
# imports
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm
from passwords import *
print('Done!')

Done!


In [88]:
# this accesses the API to get 50 thousand different game app id's
link = f'https://api.steampowered.com/IStoreService/GetAppList/v1/?key={api_key}&format=json'
p = {
    "include_games": True,
    "include_dlc": False,
    "include_software": False,
    "include_videos": False,
    "include_hardware": False,
    "max_results": 50000,   # max allowed
}
r = requests.get(link, params=p)
data = r.json()
len(data['response']['apps']) # ensures we get 50 thousand

50000

In [89]:
# this cell creates a data frame from the json data in the api with the name and app ids
dict_content = {'name': [],
               'app_id': [],
               }
for dict_app in data['response']['apps']:
    name = dict_app['name']
    appid = dict_app['appid']
    dict_content['name'].append(name)
    dict_content['app_id'].append(appid)

df_ids = pd.DataFrame(dict_content)

# show
df_ids

Unnamed: 0,name,app_id
0,Counter-Strike,10
1,Team Fortress Classic,20
2,Day of Defeat,30
3,Deathmatch Classic,40
4,Half-Life: Opposing Force,50
...,...,...
49995,Hyper Light Breaker,1534840
49996,Floor Plan 2,1534850
49997,Behold The Dark,1534860
49998,风之界限 the border of wind,1534870


In [90]:
# this cell merges the data frames and only keeps the games that are in the sales data set and we have a steam app id for
df_sales = pd.read_csv('vgsales.csv')
df_salesid = pd.merge(left=df_sales, right=df_ids, how='inner', left_on='Name', right_on='name')
df_salesid

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,name,app_id
0,99,Call of Duty: World at War,X360,2008.0,Shooter,Activision,4.79,1.90,0.00,0.69,7.37,Call of Duty: World at War,10090
1,110,Fallout 4,PS4,2015.0,Role-Playing,Bethesda Softworks,2.47,3.15,0.24,1.10,6.96,Fallout 4,377160
2,152,Resident Evil 2,PS,1998.0,Action,Virgin Interactive,1.88,1.47,2.02,0.45,5.82,Resident Evil 2,883710
3,167,Call of Duty: World at War,PS3,2008.0,Shooter,Activision,2.72,1.87,0.00,0.84,5.43,Call of Duty: World at War,10090
4,197,Resident Evil 5,PS3,2009.0,Action,Capcom,1.96,1.43,1.08,0.65,5.11,Resident Evil 5,21690
...,...,...,...,...,...,...,...,...,...,...,...,...,...
903,16556,Bound By Flame,X360,2014.0,Role-Playing,,0.00,0.01,0.00,0.00,0.01,Bound By Flame,243930
904,16565,Mighty No. 9,XOne,2016.0,Platform,Deep Silver,0.01,0.00,0.00,0.00,0.01,Mighty No. 9,314710
905,16568,Teslagrad,PSV,2015.0,Platform,Rain Games,0.00,0.01,0.00,0.00,0.01,Teslagrad,249590
906,16589,Secret Files 2: Puritas Cordis,DS,2009.0,Adventure,Deep Silver,0.00,0.01,0.00,0.00,0.01,Secret Files 2: Puritas Cordis,40340


In [91]:
# loops through all of the app ids and scrapes their data off of steam charts
dict_data = {'game_name': [],
             'all_time_peak': [],
             'last_30_day_avg': [],
            }
for app_id in tqdm(df_salesid['app_id']):
    # make the request
    link = f"https://steamcharts.com/app/{app_id}"
    r = requests.get(link)
    # if page doesn't exist just move on
    if r.status_code != 200:
        continue

    # make the bs object
    bs = BeautifulSoup(r.content)

    # find the title
    app_title = bs.find(id='app-title')
    name = app_title.a.text
    dict_data['game_name'].append(name)

    # find all time player peak
    stats = bs.find_all(class_='app-stat')
    peak = int(stats[2].span.text)
    dict_data['all_time_peak'].append(peak)

    # find the last 30 day average players
    last_30 = bs.find_all(class_='right num-f italic')
    avg_30 = float(last_30[0].text)
    dict_data['last_30_day_avg'].append(avg_30)

# show
df_data = pd.DataFrame(dict_data)

  0%|          | 0/908 [00:00<?, ?it/s]

In [92]:
# this cell saves it all into the game_data.csv
df_final = pd.merge(left=df_data, right=df_salesid, how='left', left_on='game_name', right_on='Name')
list_drop = ['game_name', 'name', 'app_id']
df_final.drop(list_drop, inplace=True, axis=1)
df_final.to_csv('./game_data.csv')