In [1]:
#Import Libraries
import pandas as pd
import numpy as np

In [2]:
#Variable containing filePath
inputFile = "Resources/purchase_data.csv"

#Read in Csv File as purchase_data dataframe
purchaseData_df = pd.read_csv(inputFile)
purchaseData_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [3]:
#Get a series that contains how many items each players purchased
playerPurchases_df = purchaseData_df["SN"].value_counts()

#using previous series, get how many unique players there are
totalPlayers = len(playerPurchases_df)

In [4]:
#Get a series on how many of each item sold
itemsSold_df = purchaseData_df["Item Name"].value_counts()

#Get how many items were purchased
totalPurchases = purchaseData_df["Item Name"].count()

#Get the number of unique items sold
uniquePurchases = len(itemsSold_df)

In [5]:
#Get the sum of all of the prices /Total Revenue
totalRevenue = purchaseData_df["Price"].sum()

#get the Average price per item
priceAverage = totalRevenue / totalPurchases
#Formate the price Average into currency
priceAverage = '{:.2f}'.format(priceAverage)

In [6]:
#Display Purchasing Analysis (Total)
print(f"Number of Unique Purchases: {uniquePurchases}\nAverage Purchase Price: ${priceAverage}\nTotal Number of Purchases: {totalPurchases}\nTotal Revenue: ${totalRevenue}")

Number of Unique Purchases: 179
Average Purchase Price: $3.05
Total Number of Purchases: 780
Total Revenue: $2379.77


In [7]:
#get all of the columns to make dataframes for each gender demographic
columns = [
    "Purchase ID",
    "SN",
    "Age",
    "Gender",
    "Item ID",
    "Item Name",
    "Price"
]

In [8]:
#Get Gender Demographics for Male
#Create datafram for only male players
purchaseData_male_df = purchaseData_df.loc[purchaseData_df["Gender"] == "Male", columns]

#Get a series that contains how many items each male player purchased
malePurchases_df = purchaseData_male_df["SN"].value_counts()

#using previous series, get how many unique male players there are
totalMalePlayers = len(malePurchases_df)

In [9]:
#Get Gender Demographics for Female
#Create datafram for only femmale players
purchaseData_female_df = purchaseData_df.loc[purchaseData_df["Gender"] == "Female", columns]

#Get a series that contains how many items each male player purchased
femalePurchases_df = purchaseData_female_df["SN"].value_counts()

#using previous series, get how many unique male players there are
totalFemalePlayers = len(femalePurchases_df)

In [10]:
#Get Gender Demographics for Other / Non-Disclosed
#Create datafram for only male players
purchaseData_other_df = purchaseData_df.loc[purchaseData_df["Gender"] == "Other / Non-Disclosed", columns]

#Get a series that contains how many items each male player purchased
otherPurchases_df = purchaseData_other_df["SN"].value_counts()

#using previous series, get how many unique male players there are
totalOtherPlayers = len(otherPurchases_df)

In [11]:
#Get Percentage of each Gender
percentMale = totalMalePlayers / totalPlayers
percentFemale = totalFemalePlayers / totalPlayers
percentOther = totalOtherPlayers / totalPlayers

#Format Percentages
percentMale = '{:.2f}%'.format(percentMale)
percentFemale = '{:.2f}%'.format(percentFemale)
percentOther = '{:.2f}%'.format(percentOther)

In [12]:
#Output for the percentage and count for the Gender Demographics
print(f"Total Male Players: {totalMalePlayers} ({percentMale})")
print(f"Total Female Players: {totalFemalePlayers} ({percentFemale})")
print(f"Total Other / Non-Disclosed Players: {totalOtherPlayers} ({percentOther})")

Total Male Players: 484 (0.84%)
Total Female Players: 81 (0.14%)
Total Other / Non-Disclosed Players: 11 (0.02%)


In [13]:
#Get the max and min age to set parameters for the bin
maxAge = purchaseData_df["Age"].max()
minAge = purchaseData_df["Age"].min()

#Create a bin list to reference for the Age Demographic
bins = [7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47]

#Create labels for the bins
age_labels = ["7-11", "11-15", "15-19", "19-23", "23-27",
             "27-31", "31-35", "35-39", "39-43", "43-47"]
purchaseData_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


In [14]:
#Change the age Group column to the dataframe
purchaseData_df["Age Group"] = pd.cut(purchaseData_df["Age"], bins, labels=age_labels)

In [15]:
#Create a groupby object based on "Age Group"
purchase_group = purchaseData_df.groupby("Age Group")

In [16]:
#Get the amount of purchases for each age group
totalPurchaseAge = purchase_group["SN"].nunique()
totalPurchaseAge

Age Group
7-11      23
11-15     35
15-19     81
19-23    210
23-27    111
27-31     44
31-35     32
35-39     21
39-43      9
43-47      3
Name: SN, dtype: int64

In [17]:
#Get the Average Purchase Price for each age Group
averageAgePrice = purchase_group["Price"].mean()
averageAgePrice.map("${:.2f}".format)

Age Group
7-11     $3.16
11-15    $2.93
15-19    $3.04
19-23    $3.03
23-27    $3.06
27-31    $2.97
31-35    $2.93
35-39    $3.54
39-43    $3.12
43-47    $2.35
Name: Price, dtype: object

In [18]:
#Get the total purchase value for each age group
totalAgePrice = purchase_group["Price"].sum()
totalAgePrice.map("${:.2f}".format)

Age Group
7-11      $94.86
11-15    $137.81
15-19    $307.24
19-23    $903.84
23-27    $459.54
27-31    $178.05
31-35    $131.66
35-39     $95.64
39-43     $31.18
43-47      $7.06
Name: Price, dtype: object

In [19]:
#Get the Average Purchase total per person for each age group
averagePurchaseTotal = (totalAgePrice / totalPurchaseAge) 
averagePurchaseTotal.map("${:.2f}".format)


Age Group
7-11     $4.12
11-15    $3.94
15-19    $3.79
19-23    $4.30
23-27    $4.14
27-31    $4.05
31-35    $4.11
35-39    $4.55
39-43    $3.46
43-47    $2.35
dtype: object

In [20]:
#Identify the top 5 spenders
topSpenders = purchaseData_df.groupby("SN")

spenderPurchaseCount = topSpenders["Purchase ID"].count()

averageSpenderPurchasePrice = topSpenders["Price"].mean()

totalSpenderPurchasePrice = topSpenders["Price"].sum()

topSpenders_df = pd.DataFrame({ 
    "Purchase Count": spenderPurchaseCount,
    "Average Purchase Price": averageSpenderPurchasePrice,
    "Total Purchase Value": totalSpenderPurchasePrice
})

test = topSpenders_df.sort_values(["Purchase Count"], ascending = False).head()
test



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Iral74,4,3.405,13.62
Idastidru52,4,3.8625,15.45
Asur53,3,2.48,7.44
Inguron55,3,3.703333,11.11


In [27]:
#Identify the 5 most popular items
topItems = purchaseData_df.groupby("Item ID")

itemPurchaseCount = topItems["Purchase ID"].count()

totalItemPurchasePrice = topItems["Price"].sum()

topItems_df = pd.DataFrame({ 
    "Item Name": purchaseData_df["Item Name"],
    "Purchase Count": itemPurchaseCount,
    "Item Price": purchaseData_df["Price"],
    "Total Purchase Value": totalItemPurchasePrice
})

test = topItems_df.sort_values(["Purchase Count"], ascending = False).head()
test



Unnamed: 0,Item Name,Purchase Count,Item Price,Total Purchase Value
92,"Betrayal, Whisper of Grieving Widows",13.0,3.94,59.99
178,"Despair, Favor of Due Diligence",12.0,4.6,50.76
145,Hopeless Ebon Dualblade,9.0,1.33,41.22
132,Fiery Glass Crusader,9.0,4.58,28.99
108,Malificent Bag,9.0,1.75,31.77


In [30]:
#Identify the top 5 most profitable items
topProfits = purchaseData_df.groupby("Price")

averageItemProfit = topItems["Price"].mean()

totalItemProfit = topItems["Price"].sum()

topProfits_df = pd.DataFrame({ 
    "Item Name": purchaseData_df["Item Name"],
    "Purchase Count": itemPurchaseCount,
    "Item Price": purchaseData_df["Price"],
    "Total Purchase Value": totalItemProfit
})

test = topProfits_df.sort_values(["Total Purchase Value"], ascending = False).head()
test


Unnamed: 0,Item Name,Purchase Count,Item Price,Total Purchase Value
92,"Betrayal, Whisper of Grieving Widows",13.0,3.94,59.99
178,"Despair, Favor of Due Diligence",12.0,4.6,50.76
82,Azurewrath,9.0,4.4,44.1
145,Hopeless Ebon Dualblade,9.0,1.33,41.22
103,"Thorn, Satchel of Dark Souls",8.0,1.33,34.8
