# Heroes Of Pymoli Data Analysis
* Of the 1163 active players, the vast majority are male (82%). There also exists, a smaller, but notable proportion of female players (16%).

* Our peak age demographic falls between 20-24 (42%) with secondary groups falling between 15-19 (17.80%) and 25-29 (15.48%).

* Our players are putting in significant cash during the lifetime of their gameplay. Across all major age and gender demographics, the average purchase for a user is roughly $491.   
-----

In [85]:
# import dependencies 
import pandas as pd
import numpy as np
import os 
# import the file 
file = os.path.join('purchase_data.json')
file_df = pd.read_json(file)
file_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [86]:
player_count = len(file_df['SN'].unique())
#dataframe format for player count
player_df = pd.DataFrame([{'Total Players': player_count}])
player_df.head()

Unnamed: 0,Total Players
0,573


## Purchasing Analysis (Total)

In [87]:
# unique item purchase
unique_purchase = len(file_df['Item ID'].unique())
# Average Purchase Price
average_price = round(file_df['Price'].mean(), 2)
# Total Number of Purchases
total_purchase = len(file_df['Price'])
# Total Revenue
total_revenue = round(file_df['Price'].sum(), 2)
total_revenue
#put everything together
purchase_analysis = pd.DataFrame({
    "Number of Unique Items":[unique_purchase],
    "Number of Purchases": total_purchase,
    "Average Price": "$" + str(average_price),
    "Total Revenue": "$" + str(total_revenue), 
}, columns=['Number of Unique Items', 'Average Price', 'Number of Purchases','Total Revenue'])

purchase_analysis.head()

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,$2286.33


## Gender Demographics

In [88]:
unique_players = file_df.drop_duplicates('SN')
unique_players.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [92]:
gender_count = file_df["SN"].nunique()
male_count = file_df[file_df["Gender"] == "Male"]["SN"].nunique()
female_count = file_df[file_df["Gender"] == "Female"]["SN"].nunique()
other_count = gender_count - male_count - female_count
male_percentage = round((male_count/gender_count)*100,2)
female_percentage = round((female_count/gender_count)*100,2)
other_percentage = round((other_count/gender_count)*100,2)
gender_full_count_df = pd.DataFrame({"Gender": ["Male", "Female", "Other / Non-Disclosed"], "Percentage of Players": [male_percentage, female_percentage, other_percentage],
                                        "Total Count": [male_count, female_count, other_count]}, columns = 
                                        ["Gender", "Percentage of Players", "Total Count"])
gender_full_count_df

Unnamed: 0,Gender,Percentage of Players,Total Count
0,Male,81.15,465
1,Female,17.45,100
2,Other / Non-Disclosed,1.4,8



## Purchasing Analysis (Gender)

In [93]:
male_purchase = file_df[file_df["Gender"] == "Male"]["Price"].count()
female_purchase = file_df[file_df["Gender"] == "Female"]["Price"].count()
other_purchase = total_purchase - male_purchase - female_purchase
male_avg = file_df[file_df["Gender"] == "Male"]['Price'].mean()
female_avg = file_df[file_df["Gender"] == "Female"]['Price'].mean()
other_avg = file_df[file_df["Gender"] == "Other / Non-Disclosed"]['Price'].mean()
male_total = file_df[file_df["Gender"] == "Male"]['Price'].sum()
female_total = file_df[file_df["Gender"] == "Female"]['Price'].sum()
other_total = file_df[file_df["Gender"] == "Other / Non-Disclosed"]['Price'].sum()
male_norm = round(male_total/male_count,2)
female_norm = round(female_total/female_count,2)
other_norm = round(other_total/other_count,2)

gender_purchase_df = pd.DataFrame({"Gender": ["Female", "Male", "Other / Non-Disclosed"], "Purchase Count": [female_purchase, male_purchase, other_purchase],
                                        "Average Purchase Price": [female_avg, male_avg, other_avg], "Total Purchase Value": [female_total, male_total, other_total],
                                "Normalized Totals": [female_norm, male_norm, other_norm]}, columns = 
                                  ["Gender", "Purchase Count", "Average Purchase Price", "Total Purchase Value", "Normalized Totals"])
gender_purchase_df.style.format({"Average Purchase Price": "${:.2f}", "Total Purchase Value": "${:.2f}", "Normalized Totals": "${:.2f}"})



Unnamed: 0,Gender,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
0,Female,136,$2.82,$382.91,$3.83
1,Male,633,$2.95,$1867.68,$4.02
2,Other / Non-Disclosed,11,$3.25,$35.74,$4.47


## Age Demographics

In [103]:
age = unique_players[["SN","Age"]]
age_10 = age[age["Age"] < 10].count()[0]
age_14 = age[(age["Age"] >= 10) & (age["Age"] <= 14)].count()[0]
age_19 = age[(age["Age"] >= 15) & (age["Age"] <= 19)].count()[0]
age_24 = age[(age["Age"] >= 20) & (age["Age"] <= 24)].count()[0]
age_29 = age[(age["Age"] >= 25) & (age["Age"] <= 29)].count()[0]
age_34 = age[(age["Age"] >= 30) & (age["Age"] <= 34)].count()[0]
age_39 = age[(age["Age"] >= 35) & (age["Age"] <= 39)].count()[0]
age_40 = age[age["Age"] >= 40].count()[0]
ages = [age_10, age_14, age_19, age_24, age_29, age_34, age_39, age_40]

# Percents
percent_10 = round((age_10/player_count)*100,2)
percent_14 = round((age_14/player_count)*100,2)
percent_19 = round((age_19/player_count)*100,2)
percent_24 = round((age_24/player_count)*100,2)
percent_29 = round((age_29/player_count)*100,2)
percent_34 = round((age_34/player_count)*100,2)
percent_39 = round((age_39/player_count)*100,2)
percent_40 = round((age_40/player_count)*100,2)
percents_a = [percent_10, percent_14, percent_19, percent_24, percent_29, percent_34, percent_39, percent_40]

# Creating the dictionary
age_demo = {
        "Percent of Players": percents_a,
        "Total Count": ages
    }
# Creating DataFrame & setting index
age_demo_df = pd.DataFrame(age_demo)
age_demo_df.index = (["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"])
age_demo_df


Unnamed: 0,Percent of Players,Total Count
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40+,1.92,11


Unnamed: 0,Percentage of Players,Total Count
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40+,1.92,11


## Purchasing Analysis (Age)

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
10-14,35,$2.77,$96.95,$4.22
15-19,133,$2.91,$386.42,$3.86
20-24,336,$2.91,$978.77,$3.78
25-29,125,$2.96,$370.33,$4.26
30-34,64,$3.08,$197.25,$4.20
35-39,42,$2.84,$119.40,$4.42
40+,17,$3.16,$53.75,$4.89
<10,28,$2.98,$83.46,$4.39


## Top Spenders

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


## Most Popular Items

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


## Most Profitable Items

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
