In [1]:
import json
import pandas as pd
from pymongo import MongoClient
import matplotlib.pyplot as plt
from helpers import plotOwners, plotNewOwners, remove0s

In [2]:
client = MongoClient('localhost', 27017)
db = client.steamspy

In [3]:
#creating new columns to be used for data and casting things as ints
data = pd.DataFrame(list(db.clean.find()))
data.drop(columns = "_id", inplace = True) #drop _id column
#making price column ints and making price of free games 0
data['price'] = data['price'].fillna(0)
data['price'] = data['price'].astype(int)
data['negative'] = data['negative'].astype(int)
data['positive'] = data['positive'].astype(int)
data['Like/Dislike Ratio'] = data['positive']/data['negative']
data = data[data['Genres'].apply(lambda x: 'Indie' in x)]
data = data[data.price == 0]
data['Owner Average'] = (data['llOwners'] + data['ulOwners'])/2
data = remove0s(data)

In [4]:
def hasGenre(series, listOfGenres):
    for genre in listOfGenres:
        if genre in series:
            return True
    return False

In [5]:
def notGenre(series, listOfGenres):
    for genre in listOfGenres:
        if genre in series:
            return False
    return True

In [6]:
genreNames = ['Action', 'Adventure']

In [7]:
actionAdventure = data[data['Genres'].apply(lambda x: hasGenre(x, genreNames))]

In [8]:
other = data[data['Genres'].apply(lambda x: notGenre(x, genreNames))]

In [9]:
import scipy as sp
import scipy.stats as stats

In [10]:
actionName = ['Action']
rpgName = ['RPG']
action = data[data['Genres'].apply(lambda x: hasGenre(x, actionName))]
rpg = data[data['Genres'].apply(lambda x: hasGenre(x, rpgName))]

In [11]:
otherAction = data[data['Genres'].apply(lambda x: notGenre(x, actionName))]
otherRPG = data[data['Genres'].apply(lambda x: notGenre(x, rpgName))]
mmo = data[data['Genres'].apply(lambda x: hasGenre(x, ['MMO']))]
otherMMO = data[data['Genres'].apply(lambda x: notGenre(x, ['MMO']))]
strategy = data[data['Genres'].apply(lambda x: hasGenre(x, ['Strategy']))]
otherStrategy= data[data['Genres'].apply(lambda x: notGenre(x, ['Strategy']))]

In [12]:
#action vs other p-value with 0s removed
res = stats.mannwhitneyu(action['Owner Average'], otherAction['Owner Average'], alternative="greater")
print(res.pvalue)

0.0029052114451273664


In [13]:
#MMO vs other p-value with 0s removed
res = stats.mannwhitneyu(mmo['Owner Average'], otherMMO['Owner Average'], alternative="greater")
print(res.pvalue)

1.0107732291236831e-07


In [14]:
res = stats.mannwhitneyu(rpg['Owner Average'], otherRPG['Owner Average'], alternative="greater")
print(res.pvalue)

0.03296116434410332


In [15]:
res = stats.mannwhitneyu(strategy['Owner Average'], otherStrategy['Owner Average'], alternative="greater")
print(res.pvalue)

0.11267032234532004


In [16]:
def compute_power(data, other, genre, alpha = 0.05):
    mu0 = other['Owner Average'].mean()
    mua = genre['Owner Average'].mean()
    sigma = data['Owner Average'].std()
    n = data.shape[0]
    standard_error = sigma / n**0.5
    h0 = stats.norm(mu0, standard_error)
    ha = stats.norm(mua, standard_error)
    critical_value = h0.ppf(1 - alpha)
    power = 1 - ha.cdf(critical_value)
    return power

In [17]:
compute_power(data, otherAction, action)

0.9999999533757833

In [18]:
compute_power(data, otherRPG, rpg)

0.439134104845189

In [19]:
compute_power(data, otherMMO, mmo)

0.9994968474730219

In [20]:
compute_power(data, otherStrategy, strategy)

1.2630559661053198e-05