  This is an API for Steam Spy. It accepts requests in GET string and returns data in JSON arrays.

  Allowed poll rate - 4 requests per second.

  ## Examples: ##
   
  * http://steamspy.com/api.php?request=appdetails&appid=730 - returns data for Counter-Strike: Global Offensive
  * http://steamspy.com/api.php?request=top100in2weeks - return Top 100 apps by players in the last two weeks
		

  ## Common parameters: ##
 
  * request - code for API request call.
  * appid - Application ID (a number).


  ## Accepted requests: ##
  
  ### appdetails ###

  Returns details for the specific application. Requires *appid* parameter.  

  ### genre ###

  Returns games in this particular genre. Requires *genre* parameter and works like this:
  
  * http://steamspy.com/api.php?request=genre&genre=Early+Access


  ### top100in2weeks ###

  Returns Top 100 games by players in the last two weeks.

  ### top100forever ###

  Returns Top 100 games by players since March 2009.

  ### top100owned ###

  Returns Top 100 games by owners.

  ### all ###

  Returns all games with owners data sorted by owners.


  ## Return format for an app: ##

  * appid - Steam Application ID. If it's 999999, then data for this application is hidden on developer's request, sorry.
  * name - the game's name
  * developer - comma separated list of the developers of the game
  * publisher - comma separated list of the publishers of the game
  * score_rank - score rank of the game based on user reviews
  * owners - owners of this application on Steam. **Beware of free weekends!**
  * owners_variance - variance in owners. The real number of owners lies somewhere on owners +/- owners_variance range.   
  * players_forever - people that have played this game since March 2009.
  * players_forever_variance - variance for total players.
  * players_2weeks - people that have played this game in the last 2 weeks.
  * players_2weeks_variance - variance for the number of players in the last two weeks. 
  * average_forever - average playtime since March 2009. In minutes.
  * average_2weeks - average playtime in the last two weeks. In minutes.
  * median_forever - median playtime since March 2009. In minutes.
  * median_2weeks - median playtime in the last two weeks. In minutes.
  * ccu - peak CCU yesterday.
  * price - US price in cents.
  * tags - the game's tags with votes in JSON array


  ## Questions? ##

  Contact me by e-mail: *sergey at galyonkin dot com*.

  

In [1]:
# Import
from bs4 import BeautifulSoup
import pandas as pd
import requests
import datetime
import telegram
import pyprind
import time
import json
import re
import os

# Download all data of the current day and store it

In [2]:
def downloadAPI():
    # Download the data and parse it
    url = 'http://steamspy.com/api.php?request=all'
    data = requests.get(url)#.json()
    print(data)

    # Store the data in file named by the date
    with open('API/{}_SteamSpy_API.json'.format(datetime.date.today()), 'w') as file:
        json.dump(data, file)

# Download the deals and store them

In [3]:
def downloadDeals():
    # Get the html-code and parse it
    url = 'http://steamspy.com/deal/'
    html = requests.get(url).text
    bs = BeautifulSoup(html, 'lxml')

    deals = []
    # Iterate over each game in the list
    for game in bs.find('tbody').findAll('tr'):
        data = [td.get_text().strip() for td in game.findAll('td')]

        # Extract, format and parse the data
        try:
            rank = int(data[0])
        except:
            rank = None
        try:
            name = data[1]
        except:
            name = None
        try:
            appid = int(game.find('a')['href'].rsplit('/', 1)[-1])
        except:
            appid = None
        try:
            release = data[2]
        except:
            release = None
        try:
            old_price = float(data[3].rsplit('$', 1)[-1][:-1])
        except:
            old_price = None
        try:
            discount = float(data[4][:-1])
        except:
            discount = None
        try:
            score_rank = float(data[5].split('%', 1)[0])
        except:
            score_rank = None
        try:
            owners = int(data[6].split(' ')[0].replace(',', ''))
        except:
            owners = None
        try:
            owners_var = int(data[6].split(' ')[1][1:].replace(',', ''))
        except:
            owners_var = None
        try:
            players = int(data[7].split(' ')[0].replace(',', ''))
        except:
            players = None
        try:
            players_var = int(data[7].split(' ')[1][1:].replace(',', ''))
        except:
            players_var = None
        try:
            hours, minutes = data[8].split(' ')[0].split(':')
            play_time_mean = int(hours)*60+int(minutes)
        except:
            play_time_mean = None
        try:
            hours, minutes = data[8].split(' ')[1][1:-1].split(':')
            play_time_median = int(hours)*60+int(minutes)
        except:
            play_time_median = None

        # Collect the data
        deals.append([rank, name, appid, release, old_price, discount, score_rank, owners, owners_var, players, players_var, play_time_mean, play_time_median])

    # Parse the data and store it in a file
    df = pd.DataFrame(deals, columns=['Rank', 'Name', 'App_ID', 'Release', 'Old_Price', 'Discount', 'Score_Rank', 'Owners', 'Owners_Variance', 'Players', 'Players_Variance', 'Play_Time_Mean', 'Play_Time_Median'])
    df['Release'] = pd.to_datetime(df['Release'])
    df.to_csv('Deals/{}_Deals.csv'.format(datetime.date.today()), index=False)

# Download the Countries' Ranks and store them

In [4]:
def downloadCountryRanks():
    # Get the html-code and parse it
    url = 'https://steamspy.com/country/'
    html = requests.get(url).text
    bs = BeautifulSoup(html, 'lxml')

    # Pattern to extract the names
    pattern = re.compile('[1-5]\. ')

    countries = []
    # Iterate over each country
    for row in bs.find('tbody').findAll('tr'):
        entries = row.findAll('td')

        # Extract the data
        country = entries[1].get_text()
        country_link = entries[1].find('a')['href'].rsplit('/', 1)[-1]
        games_per_user = float(entries[2].get_text())
        time = entries[3].get_text().split(':')
        time = 60*int(time[0]) + int(time[1])
        owned_games = re.split(pattern, entries[4].get_text())[1:]
        owned_games = list(zip(list(range(1,6)), owned_games, ['Most Owned Games']*5))
        favorite_games = re.split(pattern, entries[5].get_text())[1:]
        favorite_games = list(zip(list(range(1,6)), favorite_games, ['Favorite Games (2 Weeks)']*5))

        # Iterate over the ranks
        for rank in owned_games+favorite_games:
            countries.append([country, country_link, games_per_user, time, rank[0], rank[1], rank[2]])

    # Parse and save the data
    df = pd.DataFrame(countries, columns=['Country', 'Country_Link', 'Games_Per_User', 'Minutes (2 Weeks)', 'Rank', 'Name', 'Category'])
    df.to_csv('Country_Ranks/{}_Countries.csv'.format(datetime.date.today()), index=False)
    return df['Country_Link'].unique()

# Download the Countries' Games and store them

In [5]:
def downloadCountries():
    country_games = []
    country_data = []

    # Patterns for the data
    pat_country = re.compile('\n\n(.*?)Total')
    pat_active_users = re.compile('Total active users: (.*?) ±')
    pat_active_users_variance = re.compile(' ± (.*?)Share of total users:')
    pat_share_total_users = re.compile('Share of total users: (.*?)%Share of total games:')
    pat_share_total_games = re.compile('Share of total games: (.*?)%Owned games per user:')
    pat_games_per_user = re.compile('Owned games per user: (.*?)Average playtime \(2 weeks\):')
    pat_average_playtime_2_weeks = re.compile('Average playtime \(2 weeks\): (.*?)Average playtime \(total\):')
    pat_average_playtime_total = re.compile('Average playtime \(total\): (.*?)Active users \(2 weeks\):')
    pat_active_users_2_weeks = re.compile('Active users \(2 weeks\): (.*?)%Active users \(total\):')
    pat_active_users_total = re.compile('Active users \(total\): (.*?)%')

    # Iterate over every country
    bar = pyprind.ProgPercent(len(country_links))
    for link in country_links:
        # Get the html-code and parse it
        url = 'http://steamspy.com/country/{}'.format(link)
        html = requests.get(url).text
        bs = BeautifulSoup(html, 'lxml')

        # Get the Country-Data
        text = bs.find('div', {'class':'panel panel-transparent'}).get_text().strip()
        country = re.search(pat_country, text).group(1)
        active_users = int(re.search(pat_active_users, text).group(1).replace(',', ''))
        active_users_variance = int(re.search(pat_active_users_variance, text).group(1).replace(',', ''))
        share_total_users = float(re.search(pat_share_total_users, text).group(1))
        share_total_games = float(re.search(pat_share_total_games, text).group(1))
        games_per_user = float(re.search(pat_games_per_user, text).group(1))
        hours, minutes = re.search(pat_average_playtime_2_weeks, text).group(1).split(':')
        average_playtime_2_weeks = 60*int(hours)+int(minutes)
        hours, minutes = re.search(pat_average_playtime_total, text).group(1).split(':')
        average_playtime_total = 60*int(hours)+int(minutes)
        active_users_2_weeks = float(re.search(pat_active_users_2_weeks, text).group(1))
        active_users_total = float(re.search(pat_active_users_total, text).group(1))

        # Store the Country-Data
        country_data.append([country, active_users, active_users_variance, share_total_users, share_total_games, games_per_user, average_playtime_2_weeks, average_playtime_total, active_users_2_weeks, active_users_total])

        # Iterate over every game
        for game in bs.find('tbody').findAll('tr'):
            data = [td.get_text().strip() for td in game.findAll('td')]

            # Gather the data
            try:
                rank = int(data[0])
            except:
                rank = None
            try:
                name = data[1]
            except:
                name = None
            try:
                appid = int(game.find('a')['href'].rsplit('/', 1)[-1])
            except:
                appid = None
            try:
                player_percentage_2_weeks = float(data[2][:-1])/100
            except:
                player_percentage_2_weeks = None
            try:
                hours, minutes = data[3].split(':')
                average_hours_2_weeks = int(hours)*60+int(minutes)
            except:
                average_hours_2_weeks = None
            try:
                players_percentage_total = float(data[4][:-1])/100
            except:
                players_percentage_total = None
            try:
                hours, minutes = data[5].split(':')
                average_hours_total = int(hours)*60+int(minutes)
            except:
                average_hours_total = None
            try:
                library_share = float(data[6][:-1])/10000
            except:
                library_share = None

            # Store the data
            country_games.append([country, rank, name, appid, player_percentage_2_weeks, average_hours_2_weeks, players_percentage_total, average_hours_total, library_share])
        
        time.sleep(1)
        bar.update()

    # Save country_games to file
    df = pd.DataFrame(country_games, columns=['Country', 'Rank', 'Name', 'App_ID', 'Player_Percentage_2_Weeks', 'Average_Hours_2_Weeks', 'Players_Percentage_Total', 'Average_Hours_Total', 'Library_Share'])
    df.to_csv('Country_Games/{}_Games_Country.csv'.format(datetime.date.today()), index=False)

    # Save country_data to file
    df = pd.DataFrame(country_data, columns=['Country', 'Active_Users', 'Active_Users_Variance', 'Share_Total_Users', 'Share_Total_Games', 'Games_Per_User', 'Average_Playtime_2_Weeks', 'Average_Playtime_Total', 'Active_Users_2_Weeks', 'Active_Users_Total'])
    df.to_csv('Country_Data/{}_Country_Data.csv'.format(datetime.date.today()), index=False)

In [6]:
def botMessage(message):
    bot = telegram.Bot(token='')
    bot.send_message(chat_id=0, text=message)

In [7]:
def folderStructure():
    for folder in ['API', 'Deals', 'Country_Ranks', 'Country_Games', 'Country_Data']:
        if folder not in os.listdir():
            os.mkdir(folder)

# Execute the Programm

In [8]:
try:
    folderStructure()
    
    try:
        downloadAPI()
    except:
        print('1')
    try:
        downloadDeals()
    except:
        print('2')
    try:
        country_links = downloadCountryRanks()
    except:
        print('3')
    try:
        downloadCountries()
    except:
        print('4')
    
    message = 'Gamestar: Alles gut gelaufen'
    botMessage(message)
except:
    message = 'Gamestar: Fehler aufgetreten!'
    botMessage(message)

<Response [200]>
1


[100 %] Time elapsed: 00:02:16 | ETA: 00:00:00
Total time elapsed: 00:02:16
