In [61]:
from pymongo import MongoClient
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [15]:
client = MongoClient()
db = client['steam_capstone']
market_collection = db['market']
mc_collection = db['metacritic']
tags_collection = db['tags']

In [38]:
with open('data/sourced_id_to_name.txt') as f:
    id_to_name_dict = eval(f.read())

# Example of each document:

### Market:

In [19]:
market_collection.find_one()

{'_id': ObjectId('5b629c758267aeaedbbb6e4a'),
 'item_name': 'Letters, vol. 2: L (Carrot)',
 'app': 614910,
 'prices': [{'date': 'Jul 21 2017 01: +0',
   'median_sell_price': 0.133,
   'quantity': '1'},
  {'date': 'Jul 23 2017 01: +0', 'median_sell_price': 0.13, 'quantity': '2'},
  {'date': 'Jul 29 2017 01: +0', 'median_sell_price': 0.096, 'quantity': '1'},
  {'date': 'Jul 30 2017 01: +0', 'median_sell_price': 0.102, 'quantity': '3'},
  {'date': 'Aug 01 2017 01: +0', 'median_sell_price': 0.08, 'quantity': '1'},
  {'date': 'Aug 02 2017 01: +0', 'median_sell_price': 0.106, 'quantity': '1'},
  {'date': 'Aug 04 2017 01: +0', 'median_sell_price': 0.11, 'quantity': '1'},
  {'date': 'Aug 08 2017 01: +0', 'median_sell_price': 0.081, 'quantity': '1'},
  {'date': 'Aug 09 2017 01: +0', 'median_sell_price': 0.094, 'quantity': '1'},
  {'date': 'Aug 13 2017 01: +0', 'median_sell_price': 0.102, 'quantity': '3'},
  {'date': 'Aug 17 2017 01: +0', 'median_sell_price': 0.079, 'quantity': '1'},
  {'date': 

### Metacritic Scores:

In [20]:
mc_collection.find_one()

{'_id': ObjectId('5b6881348267aee511237fd0'),
 'app': '322330',
 'metacritic_score': 83}

### Tags:

In [21]:
tags_collection.find_one()

{'_id': ObjectId('5b6881408267aee514ed35b6'),
 'app': '530300',
 'tags': [{'tagid': 597, 'name': 'Casual', 'count': 22},
  {'tagid': 19, 'name': 'Action', 'count': 21},
  {'tagid': 492, 'name': 'Indie', 'count': 21},
  {'tagid': 128, 'name': 'Massively Multiplayer', 'count': 19},
  {'tagid': 3859, 'name': 'Multiplayer', 'count': 12},
  {'tagid': 5350, 'name': 'Family Friendly', 'count': 11},
  {'tagid': 7368, 'name': 'Local Multiplayer', 'count': 11}]}

# Counts of items, games, etc.

In [24]:
print('Apps in market collection: ' + str(len(set([x['app'] for x in market_collection.find()]))))
print('Apps in metacritic collection: ' + str(len(set([x['app'] for x in mc_collection.find()]))))
print('Apps in tags collection: ' + str(len(set([x['app'] for x in tags_collection.find()]))))

Apps in market collection: 105
Apps in metacritic collection: 19
Apps in tags collection: 110


In [12]:
print('Number of items: ' + '{:,}'.format(db.market.count()))

Number of items: 97,636


In [10]:
num_entries = 0
for doc in market_collection.find():
    num_entries += len(doc['prices'])

In [14]:
print('Number of data points: ' + '{:,}'.format(num_entries))

Number of data points: 43,184,143


# Which games are missing in each category?

In [25]:
market_apps = set([x['app'] for x in market_collection.find()])

In [26]:
metacritic_apps = set([x['app'] for x in mc_collection.find()])

In [27]:
tags_apps = set([x['app'] for x in tags_collection.find()])

In [58]:
print('Number of games in market data: ' + str(len(market_apps)))
print('Number of games in metacritic data: ' + str(len(metacritic_apps)))
print('Number of games in tags data: ' + str(len(tags_apps)))

Number of games in market data: 105
Number of games in metacritic data: 19
Number of games in tags data: 110


In [60]:
print('Games on market that I don\'t have tags for: ')
for x in market_apps:
    if str(x) not in tags_apps:
        print(id_to_name_dict[str(x)])

Games on market that I don't have tags for: 
Burst The Game


In [None]:
# Find the number of items per game

In [45]:
items_per_game = {}
for game in market_apps:
    items_per_game[id_to_name_dict[str(game)]] = market_collection.find({'app':game}).count()

In [53]:
import operator
sorted_items_per_game = sorted(items_per_game.items(), key=lambda kv: kv[1], reverse=True)

# Number of items per game:

In [55]:
sorted_items_per_game

[('Team Fortress 2', 31584),
 ('Dota 2', 31337),
 ('Counter-Strike: Global Offensive', 10765),
 ('Unturned', 6076),
 ('PAYDAY 2', 3900),
 ('Killing Floor 2', 2075),
 ('Rust', 1336),
 ('#monstercakes', 1178),
 ('Ballistic Overkill', 993),
 ('Primal Carnage: Extinction', 725),
 ('H1Z1', 676),
 ('The Culling', 557),
 ('Zombie Grinder', 420),
 ('Depth', 352),
 ('SNOW', 343),
 ('BattleBlock Theater', 326),
 ("PLAYERUNKNOWN'S BATTLEGROUNDS", 311),
 ("Don't Starve Together", 297),
 ('Supraball', 289),
 ('BATTALION 1944', 274),
 ('Redout: Enhanced Edition', 255),
 ('Space Engineers', 241),
 ('Altitude0: Lower & Faster', 223),
 ('Call to Arms', 216),
 ('Black Squad', 212),
 ('Robot Roller-Derby Disco Dodgeball', 182),
 ('Rebons', 153),
 ('Golf With Your Friends', 121),
 ('Gremlins, Inc.', 114),
 ('Reflex Arena', 114),
 ('Golf It!', 108),
 ('Brawl of Ages', 108),
 ('Armello', 105),
 ('Villages', 101),
 ('Heliborne', 100),
 ('Awesome Metal Detecting', 88),
 ('Forgotten Lore', 80),
 ('Ratz Instagi