# Heroes of Pymoli

In [62]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

## Player Count

* Display the total number of players


In [63]:
# Slice out a DataFrame of unique customers
customers = purchase_data[["SN","Gender","Age"]].drop_duplicates()
customer_count = customers.shape[0]

# Display
pd.DataFrame({"Total Number of Players" : [customer_count]}).style.hide_index()

Total Number of Players
576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [64]:
# Create a DataFrame of unique items
items = purchase_data[["Item ID", "Price"]].drop_duplicates()

# Create and display a Data Analysis Summary Data Frame
pd.concat([
    items.agg({"Price" : ["count","mean"]}).transpose().rename({
        "count" : "Number of Unique Items",
        "mean" : "Average Purchase Price"}, axis=1), 
    purchase_data.agg({"Price" : ["count","sum"]}).transpose().rename({
        "count" : "Total Number of Purchases",
        "sum" : "Total Revenue"
    }, axis=1)], axis=1).style.format({
        "Number of Unique Items" : "{:.0f}".format,
        "Average Purchase Price" : "${:.2f}".format,
        "Total Number of Purchases" : "{:.0f}".format,
        "Total Revenue" : "${:,.2f}"}).hide_index()

Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
183,$3.04,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [65]:
# Create a DataFrame with the required columns
gender_demographics = pd.concat([
    customers["Gender"].value_counts().rename("Customers"), 
    (customers["Gender"].value_counts()/customers.shape[0]).rename("Percentage")],
    axis=1).rename_axis("Gender").reset_index()

# Display
gender_demographics.style.format({"Percentage" : "{:.0%}".format}).hide_index()

Gender,Customers,Percentage
Male,484,84%
Female,81,14%
Other / Non-Disclosed,11,2%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [66]:
# Pivot on Gender
gender_purchases = pd \
    .pivot_table(purchase_data, values=["Price"], index="Gender", aggfunc=["count", "mean", "sum"]) \
    .droplevel(1, 1)

gender_purchases.rename(
    columns={"count" : "Purchase Count", "mean" : "Average Purchase Price", "sum" : "Total Purchase Value"}, inplace=True)

# Display
gender_purchases.style.format({
    "Average Purchase Price" : "${:.2f}".format,
    "Total Purchase Value" : "${:.2f}".format,
    "Avg by Customer" : "${:.2f}".format})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,$3.20,$361.94
Male,652,$3.02,$1967.64
Other / Non-Disclosed,15,$3.35,$50.19


## Age Demographics

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [67]:
# Add age bins to the purchase_data DataFrame
purchase_data["age_bins"] = pd.cut(purchase_data["Age"], range(5,55,5), right=False)

# Pivot on the age bins
age_purchases = pd.pivot_table(
    purchase_data, values=["Price"], index="age_bins", aggfunc=["count", "mean", "sum"]).droplevel(1,1)

# Manipulate
customer_count_by_age = pd.cut(customers["Age"], range(5,55,5), right=False).value_counts()
age_purchases["Avg by Customer"] = age_purchases["sum"].divide(customer_count_by_age,axis=0)
age_purchases.index.rename("Age Range", inplace = "True")
age_purchases.rename(
    columns={"count" : "Purchase Count", "mean" : "Average Purchase Price", "sum" : "Total Purchase Value"},
    inplace = True)

# Display
age_purchases.style.format({
    "Average Purchase Price" : "${:.2f}".format,
    "Total Purchase Value" : "${:.2f}".format,
    "Avg by Customer" : "${:.2f}".format})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg by Customer
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"[5, 10)",23,$3.35,$77.13,$4.54
"[10, 15)",28,$2.96,$82.78,$3.76
"[15, 20)",136,$3.04,$412.89,$3.86
"[20, 25)",365,$3.05,$1114.06,$4.32
"[25, 30)",101,$2.90,$293.00,$3.81
"[30, 35)",73,$2.93,$214.00,$4.12
"[35, 40)",41,$3.60,$147.67,$4.76
"[40, 45)",12,$3.04,$36.54,$3.32
"[45, 50)",1,$1.70,$1.70,$1.70


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [68]:
#Pivot on the customer
spenders = pd \
    .pivot_table(purchase_data, values="Price", index="SN", aggfunc=["count", "mean", "sum"]) \
    .droplevel(1,1) \
    .sort_values(by="sum", ascending=False) \
    .rename(columns={"count" : "Purchase Count", "mean" : "Average Purchase Price", "sum" : "Total Purchase Value"})

# Display
spenders.head(5).style.format({
    "Average Purchase Price" : "${:.2f}".format,
    "Total Purchase Value" : "${:.2f}".format,
    "Avg by Customer" : "${:.2f}".format})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [69]:
# Pivot on the item
items = pd \
    .pivot_table(purchase_data, values="Price", index=["Item ID","Item Name"], aggfunc=["count", "mean", "sum"]) \
    .droplevel(1, 1) \
    .sort_values(by="count", ascending=False) \
    .rename(columns={"count" : "Purchase Count", "mean" : "Item Price", "sum" : "Total Purchase Value"})

# Display
items.head(5).style.format({
    "Item Price" : "${:.2f}".format,
    "Total Purchase Value" : "${:.2f}".format,
    "Avg by Customer" : "${:.2f}".format})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [70]:
# Re-sort the items
items.sort_values(by="Total Purchase Value", ascending=False, inplace=True)

#Display
items.head(5).style.format({
    "Item Price" : "${:.2f}".format,
    "Total Purchase Value" : "${:.2f}".format,
    "Avg by Customer" : "${:.2f}".format})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80


## Observable Trends
* The Heroes of Pymoli attract more than five times as many male customers than female customers
* Two percent of the customers do not know or declare their gender
* The most important age segment for this game is the 20-24 year-olds