# Observable Trends

###### 1. Most players who play Heroes of Pymoli are male (81.15%).
###### 2. Close to half the players (45.2%) from the game's database range from ages 20 to 24. This age group has also contributed approximately 45% of the total amount of purchases.
###### 3. The most popular and also the most profitable item purchased is the Final Critic, with 14 purchased for a total of $38.60.

In [62]:
# Dependencies
import pandas as pd

In [63]:
# Set json path
json_path = 'raw_data/purchase_data.json'

In [64]:
# Read json file
heroes_df = pd.read_json(json_path)
heroes_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [65]:
# Look at column names
heroes_df.columns

Index(['Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'SN'], dtype='object')

In [66]:
# Reorginizing columns
heroes_df = heroes_df[['SN', 'Gender', 'Age', 'Item ID', 'Item Name', 'Price']]
heroes_df.head()

Unnamed: 0,SN,Gender,Age,Item ID,Item Name,Price
0,Aelalis34,Male,38,165,Bone Crushing Silver Skewer,3.37
1,Eolo46,Male,21,119,"Stormbringer, Dark Blade of Ending Misery",2.32
2,Assastnya25,Male,34,174,Primitive Blade,2.46
3,Pheusrical25,Male,21,92,Final Critic,1.36
4,Aela59,Male,23,63,Stormfury Mace,1.27


In [67]:
# Checking out the data
heroes_df.count()

SN           780
Gender       780
Age          780
Item ID      780
Item Name    780
Price        780
dtype: int64

# Total Unique Players

In [68]:
# Find number of unique players
total_players = len(heroes_df['SN'].unique())
total_players

573

# Purchasing Analysis (Total)

In [69]:
# Number of unique items
number_unique_items = len(heroes_df['Item ID'].unique())
print(number_unique_items)

# Average purchase price of unique items
avg_price = heroes_df['Price'].mean()
print(avg_price)

# Number of purchases
purchases = len(heroes_df['Price'])
print(purchases)

# Total revenue
revenue = heroes_df['Price'].sum()
print(revenue)

183
2.931192307692303
780
2286.3299999999963


In [70]:
# Create purchasing analysis dataframe
purchase_analysis = {'Number of Unique Items': [number_unique_items], 
                     'Average Price': [f'${avg_price:.2f}'], 
                     'Number of Purchases': [purchases], 
                     'Total Revenue': [f'${revenue:,.2f}']}

purchase_analysis_df = pd.DataFrame(purchase_analysis)

# Reorder columns
purchase_analysis_df = purchase_analysis_df[['Number of Unique Items', 'Average Price', 'Number of Purchases', 'Total Revenue']]

# Purchasing Analysis (Total)
purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


# Gender Demographics

In [71]:
# Only grab unique players by dropping duplicates
unique_heroes_df = heroes_df.drop_duplicates(subset='SN', keep='first')
unique_heroes_df.count()

SN           573
Gender       573
Age          573
Item ID      573
Item Name    573
Price        573
dtype: int64

In [72]:
# Group by gender to see count
unique_grouped_gender = unique_heroes_df.groupby('Gender').count()
unique_grouped_gender

Unnamed: 0_level_0,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,100,100,100,100,100
Male,465,465,465,465,465
Other / Non-Disclosed,8,8,8,8,8


In [73]:
# Convert the counts of each gender into a dataframe 
unique_grouped_gender_df = pd.DataFrame(unique_grouped_gender)

# Add percentage of players
unique_grouped_gender_df['Percentage of Players'] = round(unique_grouped_gender['SN']/total_players*100, 2)
unique_grouped_gender_df

Unnamed: 0_level_0,SN,Age,Item ID,Item Name,Price,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,100,100,100,100,100,17.45
Male,465,465,465,465,465,81.15
Other / Non-Disclosed,8,8,8,8,8,1.4


In [74]:
# Rename SN column
unique_grouped_gender_df = unique_grouped_gender_df.rename(columns={'SN': 'Total Count'})

# Reorganize columns
gender_demo = unique_grouped_gender_df[['Percentage of Players', 'Total Count']]

# Gender Demographics
gender_demo

Unnamed: 0_level_0,Percentage of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,17.45,100
Male,81.15,465
Other / Non-Disclosed,1.4,8


# Purchasing Analysis (Gender)

In [75]:
# Group by gender for Purchasing Analysis (Gender)
grouped_gender = heroes_df.groupby(['Gender'])
grouped_gender

<pandas.core.groupby.DataFrameGroupBy object at 0x0000017669706828>

In [76]:
# Grab purchase count for each gender
purchase_count = grouped_gender['Price'].count()
purchase_count

# Grab average price for each gender
avg_purchase_price = grouped_gender['Price'].mean()
avg_purchase_price

# Grab sum for each gender
total_purchase_value = grouped_gender['Price'].sum()
total_purchase_value

Gender
Female                    382.91
Male                     1867.68
Other / Non-Disclosed      35.74
Name: Price, dtype: float64

In [77]:
# Create Purchasing Analysis (Gender)
gender_purchasing_analysis = pd.DataFrame({'Purchase Count': purchase_count, 
                                           'Average Purchase Price': avg_purchase_price, 
                                           'Total Purchase Value': total_purchase_value})
# Reorder columns
gender_purchasing_analysis = gender_purchasing_analysis[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']]

# Find mean and standard deviation of average purchase price of the population
mean_norm = heroes_df['Price'].mean()
stdev_norm = heroes_df['Price'].std()

# Calculate the z value, standard score of a raw score 
z_value = (gender_purchasing_analysis['Average Purchase Price']-mean_norm)/stdev_norm

# Calculate normalized totals and add to analysis
gender_purchasing_analysis['Normalized Totals'] = gender_purchasing_analysis['Average Purchase Price']+z_value

# Format values
gender_purchasing_analysis['Average Purchase Price'] = gender_purchasing_analysis['Average Purchase Price'].map("${:.2f}".format)
gender_purchasing_analysis['Total Purchase Value'] = gender_purchasing_analysis['Total Purchase Value'].map("${:,.2f}".format)
gender_purchasing_analysis['Normalized Totals'] = gender_purchasing_analysis['Normalized Totals'].map("${:.2f}".format)

# Purchase Analysis (Gender)
gender_purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$2.71
Male,633,$2.95,"$1,867.68",$2.97
Other / Non-Disclosed,11,$3.25,$35.74,$3.53


# Age Demographics

In [78]:
# Create bins for Age Demographics Analysis
bins = [2, 9, 14, 19, 24, 29, 34, 39, 99]
labels = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

# Add bins column to dataframe
unique_heroes_df['Age Range'] = pd.cut(unique_heroes_df['Age'], bins=bins, labels=labels)
unique_heroes_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,SN,Gender,Age,Item ID,Item Name,Price,Age Range
0,Aelalis34,Male,38,165,Bone Crushing Silver Skewer,3.37,35-39
1,Eolo46,Male,21,119,"Stormbringer, Dark Blade of Ending Misery",2.32,20-24
2,Assastnya25,Male,34,174,Primitive Blade,2.46,30-34
3,Pheusrical25,Male,21,92,Final Critic,1.36,20-24
4,Aela59,Male,23,63,Stormfury Mace,1.27,20-24


In [79]:
# Group by age range
grouped_age_range = unique_heroes_df.groupby(['Age Range']).count()
grouped_age_range

Unnamed: 0_level_0,SN,Gender,Age,Item ID,Item Name,Price
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
<10,19,19,19,19,19,19
10-14,23,23,23,23,23,23
15-19,100,100,100,100,100,100
20-24,259,259,259,259,259,259
25-29,87,87,87,87,87,87
30-34,47,47,47,47,47,47
35-39,27,27,27,27,27,27
40+,11,11,11,11,11,11


In [80]:
# Convert the counts of each age range into a dataframe
age_demo_df = pd.DataFrame(grouped_age_range)

# Add percentage of players
age_demo_df['Percentage of Players'] = round(age_demo_df['Age']/total_players*100, 2)

# Rename Age column
age_demo_df = age_demo_df.rename(columns={'Age': 'Total Count'})

# Reduce dataframe and reorganize columns
age_demo_df = age_demo_df[['Percentage of Players', 'Total Count']]

# Age Demographics
age_demo_df

Unnamed: 0_level_0,Percentage of Players,Total Count
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40+,1.92,11


# Purchasing Analysis (Age)

In [81]:
# Group by Age Range
grouped_age_range = unique_heroes_df.groupby(['Age Range'])

# Grab purchase count for each age range
purchase_count = grouped_age_range['Price'].count()
purchase_count

# Grab average price for each age range
avg_purchase_price = round(grouped_age_range['Price'].mean(), 2)
avg_purchase_price

# Grab sum for each age range
total_purchase_value = grouped_age_range['Price'].sum()
total_purchase_value

Age Range
<10       59.45
10-14     62.04
15-19    289.88
20-24    765.69
25-29    263.53
30-34    152.60
35-39     78.65
40+       34.25
Name: Price, dtype: float64

In [82]:
# Create Purchasing Analysis (Age)
age_purchasing_analysis = pd.DataFrame({'Purchase Count': purchase_count, 
                                           'Average Purchase Price': avg_purchase_price, 
                                           'Total Purchase Value': total_purchase_value})
# Reorder columns
age_purchasing_analysis = age_purchasing_analysis[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']]

# Find mean and standard deviation of average purchase price of the population
mean_norm = heroes_df['Price'].mean()
stdev_norm = heroes_df['Price'].std()

# Calculate the z value, standard score of a raw score 
z_value = (age_purchasing_analysis['Average Purchase Price']-mean_norm)/stdev_norm

# Calculate normalized totals and add to analysis
age_purchasing_analysis['Normalized Totals'] = age_purchasing_analysis['Average Purchase Price']+z_value

# Format values
age_purchasing_analysis['Average Purchase Price'] = age_purchasing_analysis['Average Purchase Price'].map("${:.2f}".format)
age_purchasing_analysis['Total Purchase Value'] = age_purchasing_analysis['Total Purchase Value'].map("${:,.2f}".format)
age_purchasing_analysis['Normalized Totals'] = age_purchasing_analysis['Normalized Totals'].map("${:.2f}".format)

# Purchasing Analysis (Age)
age_purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,19,$3.13,$59.45,$3.31
10-14,23,$2.70,$62.04,$2.49
15-19,100,$2.90,$289.88,$2.87
20-24,259,$2.96,$765.69,$2.99
25-29,87,$3.03,$263.53,$3.12
30-34,47,$3.25,$152.60,$3.54
35-39,27,$2.91,$78.65,$2.89
40+,11,$3.11,$34.25,$3.27


# Top Spenders

In [83]:
# Groupy by SN
grouped_players = heroes_df.groupby('SN')

# Grab purchase count for each age range
purchase_count = grouped_players['Price'].count()
purchase_count.head()

# Grab average price for each age range
avg_purchase_price = round(grouped_players['Price'].mean(), 2)
avg_purchase_price.head()

# Grab sum for each age range
total_purchase_value = grouped_players['Price'].sum()
total_purchase_value.head()

SN
Adairialis76    2.46
Aduephos78      6.70
Aeduera68       5.80
Aela49          2.46
Aela59          1.27
Name: Price, dtype: float64

In [84]:
# Create Top Spenders Analysis
top_spenders_analysis = pd.DataFrame({'Purchase Count': purchase_count, 
                                        'Average Purchase Price': avg_purchase_price, 
                                        'Total Purchase Value': total_purchase_value})

# Reorder columns
top_spenders_analysis = top_spenders_analysis[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']]

# Sort values by total purchase value
top_spenders_analysis = top_spenders_analysis.sort_values(by='Total Purchase Value', ascending=False)

# Format values
top_spenders_analysis['Average Purchase Price'] = top_spenders_analysis['Average Purchase Price'].map('${:.2f}'.format)
top_spenders_analysis['Total Purchase Value'] = top_spenders_analysis['Total Purchase Value'].map('${:.2f}'.format)

# Top 5 Spenders
top_spenders_analysis.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


# Most Popular Items

In [85]:
# Group by Item Name
grouped_items = heroes_df.groupby(['Item ID'])
grouped_items

<pandas.core.groupby.DataFrameGroupBy object at 0x00000176696B10F0>

In [86]:
# Grab purchase count for each item
purchase_count = grouped_items['Price'].count()
purchase_count

# Grab sum for each item
total_purchase_value = grouped_items['Price'].sum()
total_purchase_value.head()

Item ID
0    1.82
1    9.12
2    3.40
3    1.79
4    2.28
Name: Price, dtype: float64

In [87]:
# Create Most Popular Items Analysis
most_popular_count_total = pd.DataFrame({'Purchase Count': purchase_count, 
                                         'Total Purchase Value': total_purchase_value})

most_popular_count_total.reset_index(inplace=True)
most_popular_count_total.head()

Unnamed: 0,Item ID,Purchase Count,Total Purchase Value
0,0,1,1.82
1,1,4,9.12
2,2,1,3.4
3,3,1,1.79
4,4,1,2.28


In [88]:
# Reduce heroes df to merge
most_popular_id_price = heroes_df[['Item Name', 'Item ID', 'Price']]

# Drop duplicate Item Names
most_popular_id_price = most_popular_id_price.drop_duplicates(subset='Item ID', keep='first')

most_popular_id_price.head()

Unnamed: 0,Item Name,Item ID,Price
0,Bone Crushing Silver Skewer,165,3.37
1,"Stormbringer, Dark Blade of Ending Misery",119,2.32
2,Primitive Blade,174,2.46
3,Final Critic,92,1.36
4,Stormfury Mace,63,1.27


In [89]:
# Merge dataframes on Item Name
combined_most_popular = pd.merge(most_popular_count_total, most_popular_id_price, on='Item ID', how='inner')
combined_most_popular.head()

Unnamed: 0,Item ID,Purchase Count,Total Purchase Value,Item Name,Price
0,0,1,1.82,Splinter,1.82
1,1,4,9.12,Crucifer,2.28
2,2,1,3.4,Verdict,3.4
3,3,1,1.79,Phantomlight,1.79
4,4,1,2.28,Bloodlord's Fetish,2.28


In [90]:
# Rename Price column
most_popular_analysis = combined_most_popular.rename(columns={'Price': 'Item Price'})

# Reorganize columns
most_popular_analysis = most_popular_analysis[['Item ID', 'Item Name', 'Purchase Count', 'Item Price', 'Total Purchase Value']]

# Group by to create Most Popular Analysis
most_popular_analysis = most_popular_analysis.set_index(['Item ID', 'Item Name'])

# Sort by greatest purchase count
most_popular_analysis = most_popular_analysis.sort_values(by=['Purchase Count'], ascending=False)

# Format values
most_popular_analysis['Item Price'] = most_popular_analysis['Item Price'].map('${:.2f}'.format)
most_popular_analysis['Total Purchase Value'] = most_popular_analysis['Total Purchase Value'].map('${:.2f}'.format)

# Most Popular Items Analysis
most_popular_analysis.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


# Most Profitable Items

In [91]:
# Format values back to numeric
most_popular_analysis['Item Price'] = pd.to_numeric(most_popular_analysis['Item Price'].str.replace('$', ''))
most_popular_analysis['Total Purchase Value'] = pd.to_numeric(most_popular_analysis['Total Purchase Value'].str.replace('$', ''))

# Sort by total purchase value
most_profit_analysis = most_popular_analysis.sort_values(by=['Total Purchase Value'], ascending=False)

# Format values
most_profit_analysis['Item Price'] = most_profit_analysis['Item Price'].map('${:.2f}'.format)
most_profit_analysis['Total Purchase Value'] = most_profit_analysis['Total Purchase Value'].map('${:.2f}'.format)

# Most Profitable Items Analysis
most_profit_analysis.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
