In [1]:
# Setup
import pandas as pd

pymoliData = pd.read_csv("purchase_data.csv")
pymoliData.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
# Player Count
playerCount = len(pymoliData.SN.unique())
playerCount

576

In [4]:
# Purchasing Analysis (Total)
uniqueItems = len(pymoliData["Item ID"].unique())
averagePrice = "{:.2f}".format(pymoliData.Price.mean())
numberOfPurchases = pymoliData["Purchase ID"].count()
revenue = "${:,.2f}".format(pymoliData.Price.sum())

data = [[uniqueItems, averagePrice, numberOfPurchases, revenue]]
df = pd.DataFrame(data, columns = ["Number of Unique Items", "Average Price", "Number of Purchases", "Total Revenue"])
df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.05,780,"$2,379.77"


In [5]:
# Gender Demographics
snGroup = pymoliData.groupby("SN")
genders = pd.DataFrame(snGroup.Gender.value_counts())
genders.columns = ["Gender Count"]
genders = genders.reset_index()
genders = genders[["SN", "Gender"]]
genders = genders.groupby("Gender").count()
genders["Percentage of Players"] = (genders.SN*100)/playerCount
genders = genders.rename(columns={"SN":"Total Count"})
genders["Percentage of Players"] = genders["Percentage of Players"].apply('{:.2f}'.format)
genders

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.06
Male,484,84.03
Other / Non-Disclosed,11,1.91


In [36]:
#  Purchasing Analysis (Gender)
byGenderGroup = pymoliData.groupby("Gender")

genderCount = byGenderGroup.count()[["Purchase ID"]]
genderCount = genderCount.rename(columns={"Purchase ID":"Purchase Count"})

meanByGender = round(byGenderGroup[["Price"]].mean(), 2)
meanByGender.columns = ["Average Purchase Price"]
analysis = pd.merge(genderCount, meanByGender, on="Gender")

totalPurchaseValue = round(byGenderGroup[["Price"]].sum(), 2)
totalPurchaseValue.columns = ["Total Purchase Value"]
analysis = pd.merge(analysis, totalPurchaseValue, on="Gender")

analysis = pd.merge(analysis, genders, on="Gender")
analysis["Avg Total Purchase per Person"] = round(analysis["Total Purchase Value"]/analysis["Total Count"], 2)
analysis = analysis[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Avg Total Purchase per Person"]]

analysis["Average Purchase Price"] = analysis["Average Purchase Price"].apply("${:.2f}".format)
analysis["Total Purchase Value"] = analysis["Total Purchase Value"].apply("${:,.2f}".format)
analysis["Avg Total Purchase per Person"] = analysis["Avg Total Purchase per Person"].apply("${:.2f}".format)

analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [67]:
# Age Demographics
bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]
groupNames = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

byAge = pd.DataFrame(pymoliData.groupby("SN").Age.value_counts())
byAge.columns = ["AgeCount"]
byAge = byAge.reset_index()
byAge = byAge[["SN", "Age"]]
byAge["Age Group"] = pd.cut(byAge.Age, bins, labels=groupNames)

groupByAge = byAge.groupby("Age Group")

ageDemographics= groupByAge.count()
ageDemographics = ageDemographics[["Age"]]
ageDemographics.columns = ["Total Count"]
ageDemographics["Percentage of Players"] = round(ageDemographics["Total Count"]*100 / playerCount, 2)

ageDemographics

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08


In [87]:
# Purchasing Analysis (Age)

byAge = pymoliData.copy()
byAge["Age Group"] = pd.cut(byAge.Age, bins, labels=groupNames)
byAge = byAge.groupby("Age Group")

purchaseCount = byAge.count()[["Purchase ID"]]
purchaseCount.columns = ["Purchase Count"]

meanPurchasePrice = round(byAge.mean()[["Price"]], 2)

purchasingAnalysisAge = pd.merge(purchaseCount, meanPurchasePrice, on="Age Group")

totalPurchasePrice = round(byAge.sum()[["Price"]], 2)

purchasingAnalysisAge = pd.merge(purchasingAnalysisAge, totalPurchasePrice, on="Age Group")
purchasingAnalysisAge = purchasingAnalysisAge.rename(columns={"Price_x":"Average Purchase Price", "Price_y":"Total Purchase Value"})

purchasingAnalysisAge = purchasingAnalysisAge.merge(ageDemographics, on="Age Group")
purchasingAnalysisAge["Avg Total Purchase per Person"] = round(purchasingAnalysisAge["Total Purchase Value"]/purchasingAnalysisAge["Total Count"], 2)
purchasingAnalysisAge = purchasingAnalysisAge[["Purchase Count", "Average Purchase Price", "Total Purchase Value", "Avg Total Purchase per Person"]]

purchasingAnalysisAge["Average Purchase Price"] = purchasingAnalysisAge["Average Purchase Price"].apply("${:.2f}".format)
purchasingAnalysisAge["Total Purchase Value"] = purchasingAnalysisAge["Total Purchase Value"].apply("${:,.2f}".format)
purchasingAnalysisAge["Avg Total Purchase per Person"] = purchasingAnalysisAge["Avg Total Purchase per Person"].apply("${:.2f}".format)

purchasingAnalysisAge

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [79]:
# Top Spenders
topSpenders = snGroup.Price.sum()
topSpenders = pd.DataFrame(topSpenders)
topSpenders = topSpenders.sort_values(by=["Price"], ascending=False)
topSpenders.columns = ["Total Purchase Value"]
topSpenders = topSpenders.head()

purchaseCount = snGroup.count()[["Price"]]
purchaseCount.columns = ["Purchase Count"]

topSpenders = pd.merge(topSpenders, purchaseCount, on="SN")

avgPurchasePrice = round(snGroup.mean()[["Price"]], 2)
avgPurchasePrice.columns = ["Avg Purchase Price"]

topSpenders = pd.merge(topSpenders, avgPurchasePrice, on="SN")

topSpenders = topSpenders[["Purchase Count", "Avg Purchase Price", "Total Purchase Value"]]
topSpenders["Avg Purchase Price"] = topSpenders["Avg Purchase Price"].apply("${:.2f}".format)
topSpenders["Total Purchase Value"] = topSpenders["Total Purchase Value"].apply("${:.2f}".format)
topSpenders

Unnamed: 0_level_0,Purchase Count,Avg Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


In [84]:
# Most Popular Items
items = pymoliData[["Item ID", "Item Name", "Price"]]
itemGroup = items.groupby(["Item ID", "Item Name"])

popularItems = itemGroup.count()
popularItems["Purchase Count"] = itemGroup["Item ID"].count()
popularItems = popularItems.sort_values(by="Purchase Count", ascending=False)

totalValue = itemGroup.sum()[["Price"]]
totalValue.columns = ["Total Purchase Value"]

popularItems = popularItems.merge(totalValue, on=["Item ID","Item Name"])

price = itemGroup.Price.value_counts()
price = pd.DataFrame(price)
price.columns = ["Count"]
price = price.reset_index()
price = price.set_index(["Item ID", "Item Name"])
price = price[["Price"]]
price.columns = ["Item Price"]
price

popularItems = popularItems.merge(price, on=["Item ID", "Item Name"])
popularItems = popularItems[["Purchase Count", "Item Price", "Total Purchase Value"]]

popularItemsFormatted = popularItems[["Purchase Count", "Item Price", "Total Purchase Value"]]

popularItemsFormatted["Item Price"] = popularItemsFormatted["Item Price"].apply("${:.2f}".format)
popularItemsFormatted["Total Purchase Value"] = popularItemsFormatted["Total Purchase Value"].apply("${:.2f}".format)
popularItemsFormatted.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


In [86]:
# Most Profitable Items
profitable = popularItems.sort_values(by="Total Purchase Value", ascending=False)
profitable["Item Price"] = profitable["Item Price"].apply("${:.2f}".format)
profitable["Total Purchase Value"] = profitable["Total Purchase Value"].apply("${:.2f}".format)
profitable.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80


In [None]:
# Observable Trends
# 1) Males account for the majority of Heroes of Pymoli players
# 2) The age range of 20 to 24 years consists of almost half of the players, and also makes almost half of the purchases
# 3) COnsidering the amount of players, not a single item can be considered popular