# ETL Project
## Example Query: Number of games for preferred platforms, grouped by genre

In [5]:
## Dependencies
import numpy as np
import pandas as pd
import pymongo
import re

In [6]:
## Setup Database connection (MongoDB)
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

## Define database in Mongo
db = client.VideoGamesDB

In [7]:
## Query: Number of games per platform (by Genre)
query =  {'$or': [{'platform': 'PS4'},
    {'platform': 'XOne'},
    {'platform': 'Steam'}]}

results = db.united.find(query)

In [8]:
## Store query results in DataFrame
preferred_platforms = pd.DataFrame(results)
preferred_platforms.head(10)

Unnamed: 0,_id,name,platform,genre,developer,publisher,user_score,year_of_release
0,6244d1e81c156aea0052492c,The Witcher 3: Wild Hunt,XOne,Role-Playing,CD Projekt Red Studio,Namco Bandai Games,9.2,2015
1,6244d1e81c156aea00524932,The Witcher 3: Wild Hunt,PS4,Role-Playing,CD Projekt Red Studio,Namco Bandai Games,9.2,2015
2,6244d1e81c156aea00524a96,Brothers: A Tale of Two Sons,XOne,Adventure,Starbreeze,505 Games,8.9,2015
3,6244d1e81c156aea00524ae3,Dark Souls III,PS4,Role-Playing,From Software,Namco Bandai Games,8.8,2016
4,6244d1e81c156aea00524b1a,Dead Rising,XOne,Action,Capcom,Capcom,8.8,2016
5,6244d1e81c156aea00524b72,Life is Strange,XOne,Adventure,DONTNOD Entertainment,Square Enix,8.7,2016
6,6244d1e81c156aea00524b84,The King of Fighters XIV,PS4,Fighting,SNK Playmore,Deep Silver,8.7,2016
7,6244d1e81c156aea00524ba6,Farming Simulator 17,PS4,Simulation,Maximum Games,Focus Home Interactive,8.7,2016
8,6244d1e81c156aea00524c19,Rocket League,PS4,Sports,Psyonix,505 Games,8.7,2016
9,6244d1e81c156aea00524c41,Brothers: A Tale of Two Sons,PS4,Adventure,Starbreeze,505 Games,8.6,2015


In [12]:
import plotly.express as px
fig = px.histogram(preferred_platforms[:15], x="name", y="user_score",
             color='platform', barmode='group',
             height=400)
fig.show()

In [None]:
## Filter by year of release
condition = (preferred_platforms['year_of_release'] >= 2014) & (preferred_platforms['year_of_release'] < 2017)

In [None]:
## Count number of games and Group by platform & genre
genres_by_platform = preferred_platforms.loc[condition][['platform', 'genre', '_id']].groupby(['platform', 'genre']).count()
genres_by_platform.rename(columns = {'_id':'Game Count'}, inplace=True)

In [None]:
# Display results
genres_by_platform

In [None]:
preferred_platforms['user_score'] = preferred_platforms['user_score'].fillna(0)

In [None]:
preferred_platforms['user_score'].astype(float)