# Heroes Of Pymoli Data Analysis

<ul>
<li> Of the 573 active players, the vast majority are male (81.15%). There also exists, a smaller, but notable proportion of female players (17.45%).</li>
<li> Our peak age demographic falls between 20-24 (45.2%) with secondary groups falling between 15-19 (17.45%) and 25-29 (15.18%).
</li>
<li> Across all major age and gender demographics, the average purchase for a user is roughly $4.00 </li>   
</ul>

In [12]:
import pandas as pd

In [13]:
df_heroes = pd.read_json("purchase_data.json")

## Player Count:

In [14]:
#calculate total number of unique players:
player_count = len(df_heroes['SN'].unique())

df_total_number_of_players=pd.DataFrame({
    "Total Players":[player_count ]
})

df_total_number_of_players.head()


Unnamed: 0,Total Players
0,573


## Purchasing Analysis (Total):

In [15]:
#calculate total number of unique items
unique_items=len(df_heroes["Item ID"].unique())

#calculate average  price 
average_price=df_heroes["Price"].mean()

#calculate total number of purchases
number_purchases=df_heroes["Item Name"].count()

#calculate total revenue
total_revenue=df_heroes["Price"].sum()

#create DataFrame and display results
df_purchasing_total=pd.DataFrame({
    "Number of Unique Items": [unique_items],
    "Average Price": [average_price],
    "Number of Purchases": [number_purchases],
    "Total Revenue": [total_revenue]
})
df_purchasing_total=df_purchasing_total[["Number of Unique Items","Average Price","Number of Purchases","Total Revenue"]]
df_purchasing_total["Average Price"]=df_purchasing_total["Average Price"].map('${:.2f}'.format)
df_purchasing_total["Total Revenue"]=df_purchasing_total["Total Revenue"].map('${:,.2f}'.format)
df_purchasing_total.head()

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


## Gender Demographics

In [16]:
#create DataFrame with unique values
unique_players_df=df_heroes.drop_duplicates(subset=["SN"], keep='first', inplace=False)

#calculate count and percentage by gender
gender_counts=unique_players_df["Gender"].value_counts().rename("Total Count")

df_gender=pd.DataFrame(gender_counts)

df_gender["Percentage of Players"]=df_gender["Total Count"]*100/df_gender["Total Count"].sum()
df_gender["Percentage of Players"]=df_gender["Percentage of Players"].round(2)
df_gender=df_gender[["Percentage of Players", "Total Count"]]

df_gender.head()


Unnamed: 0,Percentage of Players,Total Count
Male,81.15,465
Female,17.45,100
Other / Non-Disclosed,1.4,8


## Purchasing Analysis (Gender)

In [17]:
#group data by gender
gender_group=df_heroes.groupby("Gender")
#calculate purchase count by gender
purchase=gender_group["Item Name"].count().rename("Purchase Count")

purchase_df=pd.DataFrame(purchase)
#calculate average purchase price by gender
purchase_df["Average Purchase Price"]=gender_group["Price"].mean().map('${:,.2f}'.format)

#calculate total purchase value by gender
purchase_df["Total Purchase Value"]=gender_group["Price"].sum().map('${:,.2f}'.format)

#calculate normalized totals by gender
purchase_df["Normalized Totals"]=gender_group["Price"].sum()/df_gender["Total Count"]
purchase_df["Normalized Totals"]=purchase_df["Normalized Totals"].map('${:,.2f}'.format)
purchase_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$3.83
Male,633,$2.95,"$1,867.68",$4.02
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


## Age Demographics

In [18]:
#create labels and bins
group_names=["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
bins=[0,9,14,19,24,29,34,39,120]
#add categories and group Data Frame (with unique players) by categories ("Age Group")
unique_players_df["Age Group"]=pd.cut(unique_players_df["Age"],bins, labels=group_names)
group_age=unique_players_df.groupby("Age Group")

#calculate count and percentage by age category
age_total=group_age["SN"].count().rename("Total Count")

age_demographics_df=pd.DataFrame(age_total)
age_demographics_df["Percentage of Players"]=age_demographics_df["Total Count"]*100/age_demographics_df["Total Count"].sum()
age_demographics_df["Percentage of Players"]=age_demographics_df["Percentage of Players"].round(2)
age_demographics_df=age_demographics_df[["Percentage of Players", "Total Count"]]
age_demographics_df.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0_level_0,Percentage of Players,Total Count
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40+,1.92,11


## Purchasing Analysis (Age)

In [19]:
#add categories and group Data Frame  by categories ("Age Group")
df_heroes["Age Group"]=pd.cut(df_heroes["Age"],bins, labels=group_names)
p_group=df_heroes.groupby("Age Group")

#calculate purchase count by age group
p_group_count=p_group["Item Name"].count().rename("Purchase Count")
purchasing_age_group_df=pd.DataFrame(p_group_count)

#calculate average purchase price by age group
purchasing_age_group_df["Average Purchase Price"]=p_group["Price"].mean().map('${:.2f}'.format)

#calculate total purchase value by age group
purchasing_age_group_df["Total Purchase Value"]=p_group["Price"].sum().map('${:.2f}'.format)

#calculate normalized totals by age group
purchasing_age_group_df["Normalized Totals"]=p_group["Price"].sum()/group_age["SN"].count()
purchasing_age_group_df["Normalized Totals"]=purchasing_age_group_df["Normalized Totals"].map('${:.2f}'.format)
purchasing_age_group_df.head(10)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,28,$2.98,$83.46,$4.39
10-14,35,$2.77,$96.95,$4.22
15-19,133,$2.91,$386.42,$3.86
20-24,336,$2.91,$978.77,$3.78
25-29,125,$2.96,$370.33,$4.26
30-34,64,$3.08,$197.25,$4.20
35-39,42,$2.84,$119.40,$4.42
40+,17,$3.16,$53.75,$4.89


## Top Spenders

In [20]:
#group data by players SN
spenders_group=df_heroes.groupby("SN")

#calculate total purchase value
spenders_sum=spenders_group["Price"].sum().rename("Total Purchase Value")
#identify the  top 5 spenders by total purchase value and create DataFrame
spenders_df=pd.DataFrame(spenders_sum.nlargest(5))

#calculate purchase count
spenders_df["Purchase Count"]=spenders_group["SN"].count()
#calculate average purchase price
spenders_df["Average Purchase Price"]=spenders_group["Price"].mean().map('${:.2f}'.format)

spenders_df=spenders_df[["Purchase Count","Average Purchase Price", "Total Purchase Value"]]
spenders_df["Total Purchase Value"]=spenders_df["Total Purchase Value"].map('${:.2f}'.format)
spenders_df.head(5)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


## Most Popular Items

In [21]:
#group data by items ID and name 
items_group=df_heroes.groupby(["Item ID","Item Name"])
#calculate purchase count
items_group_count=items_group["Item ID"].count().rename("Purchase Count")
#identify the 5 most popular items by purchase count  and create DataFrame
items_summary_df=pd.DataFrame (items_group_count.nlargest(5))
#add item price
items_summary_df["Item Price"]=items_group["Price"].max().map('${:.2f}'.format)
##calculate total purchase value
items_summary_df["Total Purchase Value"]=items_group["Price"].sum().map('${:.2f}'.format)
items_summary_df.head(5)



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
13,Serenity,9,$1.49,$13.41
31,Trickster,9,$2.07,$18.63
34,Retribution Axe,9,$4.14,$37.26


## Most Profitable Items

In [22]:
#group data by items ID and name 
items_group=df_heroes.groupby(["Item ID","Item Name"])
#calculate total purchase value
items_group_sum=items_group["Price"].sum().rename("Total Purchase Value")
#identify the 5 most profitable items by total purchase value and create DataFrame
items_profit_df=pd.DataFrame (items_group_sum.nlargest(5))

items_profit_df["Total Purchase Value"]=items_profit_df["Total Purchase Value"].map('${:.2f}'.format)
#add item price
items_profit_df["Item Price"]=items_group["Price"].max().map('${:.2f}'.format)
#calculate purchase count
items_profit_df["Purchase Count"]=items_group["Item ID"].count()
items_profit_df=items_profit_df[["Purchase Count","Item Price", "Total Purchase Value"]]
items_profit_df.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
