In [1]:
import json
import pandas as pd
from pymongo import MongoClient
import matplotlib.pyplot as plt
import scipy as sp
import scipy.stats as stats
from helpers import plotOwners, plotNewOwners, remove0s

client = MongoClient('localhost', 27017)
db = client.steamspy

#creating new columns to be used for data and casting things as ints
data = pd.DataFrame(list(db.clean.find()))
data.drop(columns = "_id", inplace = True) #drop _id column
#making price column ints and making price of free games 0
data['price'] = data['price'].fillna(0)
data['price'] = data['price'].astype(int)
data['negative'] = data['negative'].astype(int)
data['positive'] = data['positive'].astype(int)
data['Like/Dislike Ratio'] = data['positive']/data['negative']
data = data[data['Genres'].apply(lambda x: 'Indie' not in x)]
data = data[data.price == 0]
data['Owner Average'] = (data['llOwners'] + data['ulOwners'])/2
data = remove0s(data)

In [2]:
def hasGenre(series, listOfGenres):
    for genre in listOfGenres:
        if genre in series:
            return True
    return False

def notGenre(series, listOfGenres):
    for genre in listOfGenres:
        if genre in series:
            return False
    return True

In [3]:
action = data[data['Genres'].apply(lambda x: hasGenre(x, ['Action']))]
otherAction = data[data['Genres'].apply(lambda x: notGenre(x, ['Action']))]

In [4]:
mmoName = ['MMO']
mmo = data[data['Genres'].apply(lambda x: hasGenre(x, mmoName))]
otherMMO = data[data['Genres'].apply(lambda x: notGenre(x, mmoName))]

In [5]:
#p-value action vs other
res = stats.mannwhitneyu(action['Owner Average'], otherAction['Owner Average'], alternative="greater")
print(res.pvalue)

1.2821975135530936e-18


In [6]:
#p-value mmo vs other
res = stats.mannwhitneyu(mmo['Owner Average'], otherMMO['Owner Average'], alternative="greater")
print(res.pvalue)

6.35123936011751e-11


In [7]:
#p-value sports vs other
sports = data[data['Genres'].apply(lambda x: hasGenre(x, ['Sports']))]
otherSports = data[data['Genres'].apply(lambda x: notGenre(x, ['Sports']))]
res = stats.mannwhitneyu(sports['Owner Average'], otherSports['Owner Average'], alternative="greater")
print(res.pvalue)

0.09930591212950068


In [8]:
#p-value strategy vs other
strategy = data[data['Genres'].apply(lambda x: hasGenre(x, ['Strategy']))]
otherStrategy = data[data['Genres'].apply(lambda x: notGenre(x, ['Strategy']))]
res = stats.mannwhitneyu(strategy['Owner Average'], otherStrategy['Owner Average'], alternative="greater")
print(res.pvalue)

0.3704278172472488


In [9]:
adventure = data[data['Genres'].apply(lambda x: hasGenre(x, ['Adventure']))]
otherAdventure = data[data['Genres'].apply(lambda x: notGenre(x, ['Adventure']))]
res = stats.mannwhitneyu(adventure['Owner Average'], otherAdventure['Owner Average'], alternative="greater")
print(res.pvalue)

0.6413357791557802


In [10]:
#p-value rpg vs other
rpg = data[data['Genres'].apply(lambda x: hasGenre(x, ['RPG']))]
otherRPG = data[data['Genres'].apply(lambda x: notGenre(x, ['RPG']))]
res = stats.mannwhitneyu(rpg['Owner Average'], otherRPG['Owner Average'], alternative="greater")
print(res.pvalue)

8.385353636409184e-05


In [11]:
#p-value action vs mmo
res = stats.mannwhitneyu(action['Owner Average'], mmo['Owner Average'], alternative="greater")
print(res.pvalue)

0.1654827375241395


In [12]:
#p-value action vs rpg
res = stats.mannwhitneyu(action['Owner Average'], rpg['Owner Average'], alternative="greater")
print(res.pvalue)

0.017130908050227624


In [13]:
#p-value mmo vs rpg
res = stats.mannwhitneyu(mmo['Owner Average'], rpg['Owner Average'], alternative="greater")
print(res.pvalue)

0.11288348519722408


In [20]:
def compute_power(data, other, genre, alpha = 0.05):
    mu0 = other['Owner Average'].mean()
    mua = genre['Owner Average'].mean()
    sigma = data['Owner Average'].std()
    n = data.shape[0]
    standard_error = sigma / n**0.5
    h0 = stats.norm(mu0, standard_error)
    ha = stats.norm(mua, standard_error)
    critical_value = h0.ppf(1 - alpha)
    power = 1 - ha.cdf(critical_value)
    return power

In [21]:
compute_power(data, otherAction, action)

0.99999999999506

In [23]:
compute_power(data, otherMMO, mmo)

0.10750004553449122

In [25]:
compute_power(data, otherRPG, rpg)

3.7808927856541885e-05

In [28]:
rpgAction = data[data['Genres'].apply(lambda x: hasGenre(x, ['Action', 'RPG']))]
compute_power(rpgAction, rpg, action)

0.9766864283887338