# Heroes Of Pymoli Data Analysis
* Of the 1163 active players, the vast majority are male (82%). There also exists, a smaller, but notable proportion of female players (16%).

* Our peak age demographic falls between 20-24 (42%) with secondary groups falling between 15-19 (17.80%) and 25-29 (15.48%).

* Our players are putting in significant cash during the lifetime of their gameplay. Across all major age and gender demographics, the average purchase for a user is roughly $491.   
-----

In [2]:
#Import pandas library for data analysis

import pandas as pd

#Create reference for holding json file

json_path = "Resources/purchase_data.json"

# Import the json into a pandas DataFrame

Pymoli_df = pd.read_json(json_path)

Pymoli_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [None]:
#A dictionary containing a single list element converted to a dataframe

Total_Players_DF = pd.DataFrame({"Total Players":[Pymoli_df["SN"].nunique()]})

Total_Players_DF

## Purchasing Analysis (Total)

In [64]:
#A dictionary containing lists converted to a dataframe


Purchasing_Analysis_DF = pd.DataFrame(
                                    
    {"Number of Unique Items" : [Pymoli_df["Item Name"].nunique()],
                                     
     "Average Price" : [Pymoli_df["Price"].mean()],
     
     "Number of Purchases" : [Pymoli_df["Item ID"].count()],
     
      "Total Revenue" : [Pymoli_df["Price"].sum()]
    }
)



# Format float values to 2 decimal places,add a comma, and prefix amounts with $

pd.options.display.float_format = '${:,.2f}'.format

Purchasing_Analysis_DF_Ordered = Purchasing_Analysis_DF[[
    "Number of Unique Items","Average Price","Number of Purchases","Total Revenue"]]

Purchasing_Analysis_DF_Ordered

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$2.93,780,"$2,286.33"


## Gender Demographics

In [89]:
# Reset formatting

pd.options.display.float_format = '{:,.2f}'.format

# Convert Gender value_counts()  to data frame, calculate percentage of players, and add a new column to data frame

Gender_Demo_DF = pd.DataFrame(Pymoli_df["Gender"].value_counts())

Percentage_of_Players = (Pymoli_df["Gender"].value_counts()/Pymoli_df["Gender"].count()) * 100 

Gender_Demo_DF["Percentage of Players"] = Percentage_of_Players 

# Rename and Reorganize the columns

Gender_Demo_DF = Gender_Demo_DF.rename(columns = {"Gender": "Total Count"})

Gender_Demo_DF_Organized_Renamed = Gender_Demo_DF[["Percentage of Players","Total Count"]]

Gender_Demo_DF_Organized_Renamed

Unnamed: 0,Percentage of Players,Total Count
Male,81.15,633
Female,17.44,136
Other / Non-Disclosed,1.41,11



## Purchasing Analysis (Gender)

In [72]:
# Format float values to 2 decimal places,add a comma, and prefix amounts with $

pd.options.display.float_format = '${:,.2f}'.format

Purchasing_Analysis_Summary= Pymoli_df.groupby(["Gender"])

Total_Purchase_Value = Purchasing_Analysis_Summary["Price"].sum()

Average_Purchase_Price = Purchasing_Analysis_Summary["Price"].sum() / Purchasing_Analysis_Summary["Gender"].count()

# Creating a new dataframe

Purchasing_Analysis_Summary_Gender = pd.DataFrame({"Purchase Count" : Purchasing_Analysis_Summary["Gender"].count(),
                                             "Average Purchase Price" : Average_Purchase_Price,
                                             "Total Purchase Price" : Total_Purchase_Value,
                                             "Normalized Totals" : Average_Purchase_Price})

# Reorder columns of the dataframe and display

Purchasing_Analysis_Summary_Gender[["Purchase Count","Average Purchase Price","Total Purchase Price","Normalized Totals"]]


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Price,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$2.82
Male,633,$2.95,"$1,867.68",$2.95
Other / Non-Disclosed,11,$3.25,$35.74,$3.25


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$3.83
Male,633,$2.95,"$1,867.68",$4.02
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


## Age Demographics

In [257]:
# Calculate the count for each age bin using loc

Less_than_10 = Pymoli_df.loc[Pymoli_df["Age"] < 10, ["Age","Price"]];Less_than_10

Ten_to_14 = Pymoli_df.loc[(Pymoli_df["Age"] >= 10) & (Pymoli_df["Age"] <= 14), ["Age","Price"]];Ten_to_14.count()

Fifteen_to_19 = Pymoli_df.loc[(Pymoli_df["Age"] >= 15) & (Pymoli_df["Age"] <=19), ["Age","Price"]];Fifteen_to_19.count()

Twenty_to_24 = Pymoli_df.loc[(Pymoli_df["Age"] >= 20) & (Pymoli_df["Age"] <=24), ["Age","Price"]];Twenty_to_24.count()

TwentyFive_to_29 = Pymoli_df.loc[(Pymoli_df["Age"] >= 25) & (Pymoli_df["Age"] <=29), ["Age","Price"]];TwentyFive_to_29.count()

Thirty_to_34 = Pymoli_df.loc[(Pymoli_df["Age"] >= 30) & (Pymoli_df["Age"] <=34), ["Age","Price"]];Thirty_to_34.count()

ThirtyFive_to_39 = Pymoli_df.loc[(Pymoli_df["Age"] >= 35) & (Pymoli_df["Age"] <=39), ["Age","Price"]];ThirtyFive_to_39.count()

Forty_plus = Pymoli_df.loc[Pymoli_df["Age"] >=40, ["Age","Price"]];Forty_plus.count()

# Name the indices for resulting data frame, column names transposed to indices

Age_Bin_Count = { "<10" : Less_than_10.count(),
              "10-14": Ten_to_14.count(),
              "15-19": Fifteen_to_19.count(),
              "20-24":Twenty_to_24.count(),
              "25-29":TwentyFive_to_29.count(),
              "30-34":Thirty_to_34.count(),
              "35-40":ThirtyFive_to_39.count(),
              "40+":Forty_plus.count()
             }

Age_Analysis_Summary = pd.DataFrame(Age_Bin_Count).transpose()[["Age"]]; Age_Analysis_Summary



# Rename column, introduce new column "Percentage of Players" and calculate the value, and display resulting dataframe

Age_Analysis_Summary = Age_Analysis_Summary.rename(columns ={"Age": "Total Count"});Age_Analysis_Summary

Age_Analysis_Summary["Percentage of Players"] = (Age_Analysis_Summary["Total Count"]/Age_Analysis_Summary["Total Count"].sum()) * 100; Age_Analysis_Summary

Age_Analysis_Summary_Reindexed = Age_Analysis_Summary.reindex(index =["<10","10-14","15-19","20-24","25-29","30-34","35-40","40+"])

Age_Analysis_Summary_Reindexed[["Percentage of Players","Total Count"]]

Unnamed: 0,Percentage of Players,Total Count
<10,3.59,28
10-14,4.49,35
15-19,17.05,133
20-24,43.08,336
25-29,16.03,125
30-34,8.21,64
35-40,5.38,42
40+,2.18,17


## Purchasing Analysis (Age)

In [268]:
# Format float values to 2 decimal places,add a comma, and prefix amounts with $

pd.options.display.float_format = '${:,.2f}'.format

Purchasing_Analysis_Age_DF["Purchase Count"] = pd.DataFrame(Age_Analysis_Summary["Total Count"])

Purchasing_Analysis_Age_DF["Total Purchase Value"] = [Less_than_10["Price"].sum(),Ten_to_14["Price"].sum(),Fifteen_to_19["Price"].sum(),Twenty_to_24["Price"].sum(),TwentyFive_to_29["Price"].sum(),Thirty_to_34["Price"].sum(),ThirtyFive_to_39["Price"].sum(),Forty_plus["Price"].sum()]

Purchasing_Analysis_Age_DF["Average Purchase Price"] = Purchasing_Analysis_Age_DF["Total Purchase Value"]/Purchasing_Analysis_Age_DF["Total Count"]

Purchasing_Analysis_Age_DF["Normalized Totals"] = (Purchasing_Analysis_Age_DF["Total Purchase Value"] - Purchasing_Analysis_Age_DF["Average Purchase Price"]) / (Purchasing_Analysis_Age_DF["Total Purchase Value"].max() - Purchasing_Analysis_Age_DF["Total Purchase Value"].min())

Purchasing_Analysis_Age_DF[["Purchase Count","Average Purchase Price","Total Purchase Value","Normalized Totals"]]

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
10-14,35,$2.38,$83.46,$0.09
15-19,133,$0.73,$96.95,$0.10
20-24,336,$1.15,$386.42,$0.42
25-29,125,$7.83,$978.77,$1.05
30-34,64,$5.79,$370.33,$0.39
35-40,42,$4.70,$197.25,$0.21
40+,17,$7.02,$119.40,$0.12
<10,28,$1.92,$53.75,$0.06


Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
10-14,35,$2.77,$96.95,$4.22
15-19,133,$2.91,$386.42,$3.86
20-24,336,$2.91,$978.77,$3.78
25-29,125,$2.96,$370.33,$4.26
30-34,64,$3.08,$197.25,$4.20
35-39,42,$2.84,$119.40,$4.42
40+,17,$3.16,$53.75,$4.89
<10,28,$2.98,$83.46,$4.39


## Top Spenders

In [175]:
Top_Spenders = Pymoli_df["SN"].value_counts().index.tolist()[:5];Top_Spenders

# df = Pymoli_df.set_index("SN");df

df = Pymoli_df.loc[(Pymoli_df["SN"] == Top_Spenders[0]) | (Pymoli_df["SN"] == Top_Spenders[1]) | (Pymoli_df["SN"] == Top_Spenders[2])|(Pymoli_df["SN"] == Top_Spenders[3])|(Pymoli_df["SN"] == Top_Spenders[4]),:]; df

g = df.groupby('SN'); 
Average_Purchase_Price = g["Price"].mean();Average_Purchase_Price
Total_Purchase_Price = g["Price"].sum();Total_Purchase_Price
Purchase_Count = g["SN"].count();Purchase_Count

new = pd.DataFrame(g.count());new

new["Purchase Count"] = Purchase_Count; new
new["Total Purchase Value"] = Total_Purchase_Price;new
new["Average Purchase Price"] = Average_Purchase_Price;new

pd.options.display.float_format = '${:,.2f}'.format

new_DF = new[["Purchase Count","Average Purchase Price","Total Purchase Value"]]; new_DF.sort_values("Total Purchase Value", ascending = False)



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Sondastan54,4,$2.56,$10.24
Hailaphos89,4,$1.47,$5.87


## Most Popular Items

In [190]:
Popular_Items = Pymoli_df["Item Name"].value_counts().index.tolist();Popular_Items
df1 = Pymoli_df.loc[(Pymoli_df["Item Name"] == Popular_Items[0]) | (Pymoli_df["SN"] == Popular_Items[1]) | (Pymoli_df["SN"] == Popular_Items[2])|(Pymoli_df["SN"] == Popular_Items[3])|(Pymoli_df["SN"] == Popular_Items[4]),:]; df1
g1 = df1.groupby(["Item Name"]);g1.count()

Unnamed: 0_level_0,Age,Gender,Item ID,Price,SN
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Final Critic,14,14,14,14,14


## Most Profitable Items

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
