# Heroes of Pymoli

GWU Data Analytics Bootcamp Homework 4

### Observable Trends

* Males make up a significantly larger portion of the player base (or at least the player base who makes purchases) than women and also tend to spend more than women.

* The majority of players are between the ages of 20-24, but the typical 20-24 year old pays less than players in other age brackets - both older and younger.

* Higher-priced items tend to be more profitable even if they aren't more frequently purchased. The price difference often compensates for any differences in purchase frequency.

In [1]:
# Load dependencies

import pandas as pd
import numpy as np

# Read in data and create initial dataframe

json = "../Resources/Heroes.JSON"
df = pd.read_json(json)

### Player Count

In [2]:
# Calculate number of unique players/individuals

num_players = df['SN'].nunique()
pd.DataFrame({"Total Players":[num_players]})

Unnamed: 0,Total Players
0,573


### Purchasing Analysis (Total)

In [3]:
# Calculate summary statistics for data

unique_items_count = df['Item ID'].nunique()
avg_purchase_price = df['Price'].mean()
total_purchases = df['Price'].count()
total_revenue = df['Price'].sum()

# Create and format dataframe

tot_purchases_df = pd.DataFrame({"Number of Unique Items":[unique_items_count], "Average Price":[avg_purchase_price], "Number of Purchases":[total_purchases], "Total Revenue":[total_revenue]})
tot_purchases_df = tot_purchases_df.reindex(['Number of Unique Items','Average Price','Number of Purchases','Total Revenue'], axis=1)

# Format data

tot_purchases_df['Average Price'] = tot_purchases_df['Average Price'].map("${:.2f}".format)
tot_purchases_df['Total Revenue'] = tot_purchases_df['Total Revenue'].map("${:,.2f}".format)

# Print data

tot_purchases_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


### Gender Demographics

In [4]:
# Isolate unique individuals

sn_unique_df = df.drop_duplicates(subset="SN")

# Group data by gender

sn_unique_gender_df = sn_unique_df.groupby(['Gender'])

# Calculate count and percent of unique individuals by gender

sn_unique_grouped_gender_count = sn_unique_gender_df[['Gender']].count()
sn_unique_grouped_gender_percent = (sn_unique_gender_df[['Gender']].count() / sn_unique_gender_df[['Gender']].count().sum()) * 100

# Combine count and percent dataframes

sn_unique_gender_demo_df = pd.merge(sn_unique_grouped_gender_percent, sn_unique_grouped_gender_count, left_index=True, right_index=True, how='outer')
sn_unique_gender_demo_df = sn_unique_gender_demo_df.rename(columns={"Gender_x":"Percent of Players", "Gender_y":"Total Count"})

# Format data

sn_unique_gender_demo_df['Percent of Players'] = sn_unique_gender_demo_df['Percent of Players'].map("{:.2f}%".format)

# Sort data by descending gender frequency

sn_unique_gender_demo_df.sort_values('Percent of Players', ascending=False)

Unnamed: 0_level_0,Percent of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,81.15%,465
Female,17.45%,100
Other / Non-Disclosed,1.40%,8


### Purchasing Analysis (Gender)

In [7]:
# Group dataframe and perform initial aggregation

grouped_gender_df = df.groupby(['Gender'])
gender_purchase_df = grouped_gender_df.agg({'Gender':['count'], 'Price':['mean', 'sum']})

# Calculate normalized totals (total purchases / unique individuals)

norm_gender_tots = grouped_gender_df['Price'].sum() / (sn_unique_gender_df['Gender'].count())
gender_purchase_df['Normalized Totals'] = norm_gender_tots

# Format dataframe and rename columns

gender_purchase_df.columns = gender_purchase_df.columns.droplevel()
gender_purchase_df = gender_purchase_df.rename(columns={"count":"Purchase Count", "mean":"Average Purchase Price", "sum":"Total Purchase Value", "":"Normalized Totals"})

# Format data

gender_purchase_df['Average Purchase Price'] = gender_purchase_df['Average Purchase Price'].map("${:.2f}".format)
gender_purchase_df['Total Purchase Value'] = gender_purchase_df['Total Purchase Value'].map("${:,.2f}".format)
gender_purchase_df['Normalized Totals'] = gender_purchase_df['Normalized Totals'].map("${:,.2f}".format)

# Print data

gender_purchase_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$3.83
Male,633,$2.95,"$1,867.68",$4.02
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


### Age Demographics

In [8]:
# Create and assign age brackets

bins = [0, 9, 14, 19, 24, 29, 34, 39, 44, 49]
group_names = ['Under 10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45-49']
df['Age Bracket'] = pd.cut(df["Age"], bins, labels=group_names)

# Isolate unique players 

sn_unique_df = df.drop_duplicates(subset="SN")

# Calculate count and percent of unique players in dataframe

sn_unique_age_df = sn_unique_df.groupby(['Age Bracket'])
sn_unique_grouped_age_count = sn_unique_age_df[['Age']].count()
sn_unique_grouped_age_percent = (sn_unique_age_df[['Age']].count() / sn_unique_age_df[['Age']].count().sum()) * 100

# Combine count and percent dataframes

sn_unique_age_demo_df = pd.merge(sn_unique_grouped_age_percent, sn_unique_grouped_age_count, left_index=True, right_index=True, how='outer')

# Rename dataframe columns and format data

sn_unique_age_demo_df = sn_unique_age_demo_df.rename(columns={"Age_x":"Percent of Players", "Age_y":"Total Count"})
sn_unique_age_demo_df['Percent of Players'] = sn_unique_age_demo_df['Percent of Players'].map("{:.2f}%".format)

# Print data

sn_unique_age_demo_df

Unnamed: 0_level_0,Percent of Players,Total Count
Age Bracket,Unnamed: 1_level_1,Unnamed: 2_level_1
Under 10,3.32%,19
10-14,4.01%,23
15-19,17.45%,100
20-24,45.20%,259
25-29,15.18%,87
30-34,8.20%,47
35-39,4.71%,27
40-44,1.75%,10
45-49,0.17%,1


### Purchasing Analysis (Age)

In [9]:
# Group and aggregate data

grouped_age_df = df.groupby(['Age Bracket'])
age_purchase_df = df.groupby(['Age Bracket']).agg({'Age':['count'], 'Price':['mean', 'sum']})

# Calculate normalized totals (total purchases / unique individuals)

age_purchase_df['Normalized Totals'] = grouped_age_df['Price'].sum() / sn_unique_age_df['Age'].count()

# Format dataframe

age_purchase_df.columns = age_purchase_df.columns.droplevel()
age_purchase_df = age_purchase_df.rename(columns={"count":"Purchase Count", "mean":"Average Purchase Price", "sum":"Total Purchase Value", "":"Normalized Totals"})

# Format data

age_purchase_df['Average Purchase Price'] = age_purchase_df['Average Purchase Price'].map("${:.2f}".format)
age_purchase_df['Total Purchase Value'] = age_purchase_df['Total Purchase Value'].map("${:,.2f}".format)
age_purchase_df['Normalized Totals'] = age_purchase_df['Normalized Totals'].map("${:,.2f}".format)

# Print data

age_purchase_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Age Bracket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Under 10,28,$2.98,$83.46,$4.39
10-14,35,$2.77,$96.95,$4.22
15-19,133,$2.91,$386.42,$3.86
20-24,336,$2.91,$978.77,$3.78
25-29,125,$2.96,$370.33,$4.26
30-34,64,$3.08,$197.25,$4.20
35-39,42,$2.84,$119.40,$4.42
40-44,16,$3.19,$51.03,$5.10
45-49,1,$2.72,$2.72,$2.72


### Top Spenders

In [157]:
# Aggregate data and format dataframe

top_spenders_df = df.groupby(['SN']).agg({'Price':['count', 'mean', 'sum']})
top_spenders_df.columns = top_spenders_df.columns.droplevel()
top_spenders_df = top_spenders_df.rename(columns={"count":"Purchase Count", "mean":"Average Purchase Price", "sum":"Total Purchase Value"})

# Sort data and grab first 5 rows

top_spenders_df = top_spenders_df.sort_values('Total Purchase Value', ascending=False)
top_spenders_df = top_spenders_df.head()

# Format data

top_spenders_df['Average Purchase Price'] = top_spenders_df['Average Purchase Price'].map("${:,.2f}".format)
top_spenders_df['Total Purchase Value'] = top_spenders_df['Total Purchase Value'].map("${:,.2f}".format)

# Print data

top_spenders_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


### Most Popular Items

In [161]:
# Aggregate data and format dataframe

grouped_items_df = df.groupby(['Item ID', 'Item Name']).agg({'Price':['count', 'mean', 'sum']})
grouped_items_df.columns = grouped_items_df.columns.droplevel()
grouped_items_df = grouped_items_df.rename(columns={"count":"Purchase Count", "mean":"Average Purchase Price", "sum":"Total Purchase Value"})

# Sort data and grab first 5 rows

pop_items_df = grouped_items_df.sort_values('Purchase Count', ascending=False)
pop_items_df = pop_items_df.head()

# Format data

pop_items_df['Average Purchase Price'] = pop_items_df['Average Purchase Price'].map("${:,.2f}".format)
pop_items_df['Total Purchase Value'] = pop_items_df['Total Purchase Value'].map("${:,.2f}".format)

# Print data

pop_items_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


### Most Profitable Items

In [162]:
# Aggregate data and format dataframe

grouped_items_df = df.groupby(['Item ID', 'Item Name']).agg({'Price':['count', 'mean', 'sum']})
grouped_items_df.columns = grouped_items_df.columns.droplevel()
grouped_items_df = grouped_items_df.rename(columns={"count":"Purchase Count", "mean":"Average Purchase Price", "sum":"Total Purchase Value"})

# Sort data and grab first 5 rows

rev_items_df = grouped_items_df.sort_values('Total Purchase Value', ascending=False)
rev_items_df = rev_items_df.head()

# Format data

rev_items_df['Average Purchase Price'] = rev_items_df['Average Purchase Price'].map("${:,.2f}".format)
rev_items_df['Total Purchase Value'] = rev_items_df['Total Purchase Value'].map("${:,.2f}".format)

# Print data

rev_items_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
