# ETL Project
## Example Query: Number of games for preferred platforms, grouped by genre

In [1]:
## Dependencies
import numpy as np
import pandas as pd
import pymongo
import re

In [2]:
## Setup Database connection (MongoDB)
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

## Define database in Mongo
db = client.VideoGamesDB

In [3]:
## Query: Number of games per platform (by Genre)
query =  {'$or': [{'platform': 'PS4'},
    {'platform': 'XOne'},
    {'platform': 'Steam'}]}

results = db.united.find(query)

In [4]:
## Store query results in DataFrame
preferred_platforms = pd.DataFrame(results)
preferred_platforms.head()

Unnamed: 0,_id,name,platform,genre,developer,publisher,user_score,year_of_release
0,60c4e7d3f19b51d2b434cb1b,Call of Duty: Black Ops 3,PS4,Shooter,,Activision,,2015
1,60c4e7d3f19b51d2b434cb26,Grand Theft Auto V,PS4,Action,Rockstar North,Take-Two Interactive,8.3,2014
2,60c4e7d3f19b51d2b434cb49,FIFA 16,PS4,Sports,EA Sports,Electronic Arts,4.3,2015
3,60c4e7d3f19b51d2b434cb53,Star Wars Battlefront (2015),PS4,Shooter,,Electronic Arts,,2015
4,60c4e7d3f19b51d2b434cb58,Call of Duty: Advanced Warfare,PS4,Shooter,Sledgehammer Games,Activision,5.7,2014


In [5]:
## Filter by year of release
condition = (preferred_platforms['year_of_release'] >= 2014) & (preferred_platforms['year_of_release'] < 2017)

In [6]:
## Count number of games and Group by platform & genre
genres_by_platform = preferred_platforms.loc[condition][['platform', 'genre', '_id']].groupby(['platform', 'genre']).count()
genres_by_platform.rename(columns = {'_id':'Game Count'}, inplace=True)

In [7]:
# Display results
genres_by_platform

Unnamed: 0_level_0,Unnamed: 1_level_0,Game Count
platform,genre,Unnamed: 2_level_1
PS4,Action,142
PS4,Adventure,28
PS4,Fighting,17
PS4,Misc,19
PS4,Platform,9
PS4,Puzzle,1
PS4,Racing,18
PS4,Role-Playing,51
PS4,Shooter,38
PS4,Simulation,6
