In [1]:
import json
import pandas as pd
from pymongo import MongoClient
import matplotlib.pyplot as plt
import scipy as sp
import scipy.stats as stats
from helpers import plotOwners, plotNewOwners, remove0s

client = MongoClient('localhost', 27017)
db = client.steamspy

#creating new columns to be used for data and casting things as ints
data = pd.DataFrame(list(db.clean.find()))
data.drop(columns = "_id", inplace = True) #drop _id column
#making price column ints and making price of free games 0
data['price'] = data['price'].fillna(0)
data['price'] = data['price'].astype(int)
data['negative'] = data['negative'].astype(int)
data['positive'] = data['positive'].astype(int)
data['Like/Dislike Ratio'] = data['positive']/data['negative']
data = data[data['Genres'].apply(lambda x: 'Indie' in x)]
data = data[data.price != 0]
data['Owner Average'] = (data['llOwners'] + data['ulOwners'])/2
data['earnings'] = data['price'] * data['Owner Average']
data = remove0s(data)

In [2]:
def hasGenre(series, listOfGenres):
    for genre in listOfGenres:
        if genre in series:
            return True
    return False

def notGenre(series, listOfGenres):
    for genre in listOfGenres:
        if genre in series:
            return False
    return True

In [3]:
action = data[data['Genres'].apply(lambda x: hasGenre(x, ['Action']))]
otherAction = data[data['Genres'].apply(lambda x: notGenre(x, ['Action']))]
mmo = data[data['Genres'].apply(lambda x: hasGenre(x, ['MMO']))]
otherMMO = data[data['Genres'].apply(lambda x: notGenre(x, ['MMO']))]
adventure = data[data['Genres'].apply(lambda x: hasGenre(x, ['Adventure']))]
otherAdventure = data[data['Genres'].apply(lambda x: notGenre(x, ['Adventure']))]

In [4]:
#p-value action vs other
res = stats.mannwhitneyu(action['earnings'], otherAction['earnings'], alternative="greater")
print(res.pvalue)

0.003624629779023982


In [5]:
#p-value mmo vs other
res = stats.mannwhitneyu(mmo['earnings'], otherMMO['earnings'], alternative="greater")
print(res.pvalue)

0.015133793710619575


In [6]:
#p-value adventure vs other
res = stats.mannwhitneyu(adventure['earnings'], otherAdventure['earnings'], alternative="greater")
print(res.pvalue)

0.027802705962444118


In [7]:
#p-value mmo vs action
res = stats.mannwhitneyu(mmo['earnings'], action['earnings'], alternative="greater")
print(res.pvalue)

0.03767001204826956


In [8]:
#p-value action vs adventure
res = stats.mannwhitneyu(action['earnings'], adventure['earnings'], alternative="greater")
print(res.pvalue)

0.39228796997527804


In [9]:
def compute_power(data, other, genre, alpha = 0.05):
    mu0 = other['earnings'].mean()
    mua = genre['earnings'].mean()
    sigma = data['earnings'].std()
    n = data.shape[0]
    standard_error = sigma / n**0.5
    h0 = stats.norm(mu0, standard_error)
    ha = stats.norm(mua, standard_error)
    critical_value = h0.ppf(1 - alpha)
    power = 1 - ha.cdf(critical_value)
    return power

In [10]:
compute_power(data, otherAction, action)

0.9996587744877087

In [11]:
compute_power(data, otherMMO, mmo)

1.0

In [12]:
compute_power(data, otherAdventure, adventure)

0.9944129523328231

In [13]:
compute_power(data, action, mmo)

1.0