### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [2]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

## Player Count

* Display the total number of players


In [3]:
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [4]:
total_players= {'Total Players':[len(purchase_data['SN'].value_counts())]}
total_players1 = pd.DataFrame(total_players)
total_players1

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [6]:
#Number of unique items
unique_items = purchase_data['Item ID'].nunique()
unique_items

179

In [7]:
#Average purchase price
average_price = purchase_data['Price'].mean()
average_price 

3.050987179487176

In [8]:
#Total number of purchases
total_purchases = purchase_data["Purchase ID"].count()
total_purchases

780

In [10]:
#Total revenue
total_revenue = purchase_data['Price'].sum()
total_revenue

2379.77

In [13]:
#Purchasing analysis (Total)
purchasing_analysis = {"Number of Unique Items":[unique_items],"Average Purchase Price":[average_price],"Number of Purchases": [total_purchases],
                     "Total Revenue":[total_revenue]}
purchasing_analysis_new = pd.DataFrame(purchasing_analysis)
purchasing_analysis_new = purchasing_analysis_new.style.format({"Average Purchase Price":"${:,.2f}","Total Revenue":"${:,.2f}"})
purchasing_analysis_new

Unnamed: 0,Number of Unique Items,Average Purchase Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [17]:
#Total number of players
total_count = len(purchase_data['SN'].unique())
total_count

576

In [20]:
#Total male players (Count & Percentage)
male1= purchase_data.groupby(['Gender']).get_group(('Male'))
male2= len(male1['SN'].unique())
male_percent = round((male2/total_count)*100,2)
print(male2)
print(male_percent)

484
84.03


In [21]:
#Total female players (Count & Percentage)
female1= purchase_data.groupby(['Gender']).get_group(('Female'))
female2= len(female1['SN'].unique())
female_percent= round((female2/total_count)*100,2)
print(female2)
print(female_percent)

81
14.06


In [22]:
#Total Other/Non-Disclosed players (Count & Percentage)
other1= purchase_data.groupby(['Gender']).get_group(('Other / Non-Disclosed'))
other2= len(other1['SN'].unique())
other_percent= round((other2/total_count)*100,2)
print(other2)
print(other_percent)

11
1.91



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [23]:
#Male purchase analysis
male_purchase_count = len(male1['SN'])
male_total_purchase = male1['Price'].sum()
male_average_price = (male1["Price"].sum())/len(male1["Price"])
male_average_total = male_total_purchase/male2
print(male_purchase_count)
print(male_average_price)
print(male_total_purchase)
print(male_average_total)

652
3.0178527607361967
1967.64
4.065371900826446


In [24]:
#Female purchase analysis
female_purchase_count = len(female1['SN'])
female_total_purchase = female1['Price'].sum()
female_average_price = (female1["Price"].sum())/len(female1["Price"])
female_average_total = female_total_purchase/female2
print(female_purchase_count)
print(female_average_price)
print(female_total_purchase)
print(female_average_total)

113
3.203008849557522
361.94
4.468395061728395


In [25]:
#Other/Non-Disclosed purchase analysis
other_purchase_count = len(other1['SN'])
other_total_purchase = other1['Price'].sum()
other_average_price = (other1["Price"].sum())/len(other1["Price"])
other_average_total = other_total_purchase/other2
print(other_purchase_count)
print(other_average_price)
print(other_total_purchase)
print(other_average_total)

15
3.3459999999999996
50.19
4.5627272727272725


In [26]:
gender_analysis = {"Gender":["Male","Female","Other / Non-Disclosed"],
                      "Purchase Count":[male_purchase_count,female_purchase_count,other_purchase_count], 
                    "Average Purchase Price":[male_average_price,female_average_price,other_average_price],
                    "Total Purchase Value":[male_total_purchase,female_total_purchase,other_total_purchase],
                "Average Total Purchase per Person":[male_average_total,female_average_total,other_average_total]}

gender_df = pd.DataFrame(gender_analysis)
gender_df = gender_df.set_index("Gender")
gender_df = gender_df.style.format({"Average Purchase Price": "${:.2f}","Total Purchase Value": "${:.2f}","Average Total Purchase per Person": "${:.2f}"})

gender_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,652,$3.02,$1967.64,$4.07
Female,113,$3.20,$361.94,$4.47
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [27]:
Max_Age = purchase_data["Age"].max()
Max_Age

45

In [28]:
bins = [0,9,14,19,24,29,34,39,46]
age_labels = ["< 10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]
purchase_data["Age Summary"] = pd.cut(purchase_data["Age"],bins,labels= age_labels)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Summary
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [29]:
#Purchase Count
Bin1 = purchase_data.groupby(["Age Summary"]).get_group(("< 10"))
pc1 = len(Bin1["SN"].unique())
Per_Bin1 = (pc1/total_count)*100
print(pc1)
print(Per_Bin1)

Bin2 = purchase_data.groupby(["Age Summary"]).get_group(("10-14"))
pc2 = len(Bin2["SN"].unique())
Per_Bin2 = (pc2/total_count)*100
print(pc2)
print(Per_Bin2)

Bin3 = purchase_data.groupby(["Age Summary"]).get_group(("15-19"))
pc3 = len(Bin3["SN"].unique())
Per_Bin3 = (pc3/total_count)*100
print(pc3)
print(Per_Bin3)

Bin4 = purchase_data.groupby(["Age Summary"]).get_group(("20-24"))
pc4 = len(Bin4["SN"].unique())
Per_Bin4 = (pc4/total_count)*100
print(pc4)
print(Per_Bin4)

Bin5 = purchase_data.groupby(["Age Summary"]).get_group(("25-29"))
pc5 = len(Bin5["SN"].unique())
Per_Bin5 = (pc5/total_count)*100
print(pc5)
print(Per_Bin5)

Bin6 = purchase_data.groupby(["Age Summary"]).get_group(("30-34"))
pc6 = len(Bin6["SN"].unique())
Per_Bin6 = (pc6/total_count)*100
print(pc6)
print(Per_Bin6)

Bin7 = purchase_data.groupby(["Age Summary"]).get_group(("35-39"))
pc7 = len(Bin7["SN"].unique())
Per_Bin7 = (pc7/total_count)*100
print(pc7)
print(Per_Bin7)

Bin8 = purchase_data.groupby(["Age Summary"]).get_group(("40+"))
pc8 = len(Bin8["SN"].unique())
Per_Bin8 = (pc8/total_count)*100
print(pc8)
print(Per_Bin8)

17
2.951388888888889
22
3.8194444444444446
107
18.57638888888889
258
44.79166666666667
77
13.368055555555555
52
9.027777777777777
31
5.381944444444445
12
2.083333333333333


In [31]:
Player_Bins_Count=[pc1,pc2,pc3,pc4,pc5,pc6,pc7,pc8]
Percent_Bins= [Per_Bin1,Per_Bin2,Per_Bin3,Per_Bin4,Per_Bin5,Per_Bin6,Per_Bin7,Per_Bin8]


Age_Demo = {"":age_labels,"Total Count":Player_Bins_Count,"Percentage Of Players":Percent_Bins}
Age_Demo2 = pd.DataFrame(Age_Demo)
Age_Demo2 = Age_Demo2.set_index("")
Age_Demo2 = Age_Demo2.style.format({"Percentage Of Players":"{:,.2f}%"})
Age_Demo2

Unnamed: 0,Total Count,Percentage Of Players
,,
< 10,17.0,2.95%
10-14,22.0,3.82%
15-19,107.0,18.58%
20-24,258.0,44.79%
25-29,77.0,13.37%
30-34,52.0,9.03%
35-39,31.0,5.38%
40+,12.0,2.08%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [36]:
Age_Group1 = purchase_data.groupby(["Age Summary"]).get_group(("< 10"))
AG_Count1 = len(Age_Group1["SN"])
Age_Group1_Total_Purchase = Age_Group1["Price"].sum()
Age_Group1_Avg_Price= Age_Group1_Total_Purchase/len(Age_Group1["Price"])
Unique_AG_Count1 = len(Age_Group1["SN"].unique())
PP_AvgTot_AG1 =Age_Group1_Total_Purchase/Unique_AG_Count1

Age_Group2 = purchase_data.groupby(["Age Summary"]).get_group(("10-14"))
AG_Count2 = len(Age_Group2["SN"])
Age_Group2_Total_Purchase = Age_Group2["Price"].sum()
Age_Group2_Avg_Price= Age_Group2_Total_Purchase/len(Age_Group2["Price"])
Unique_AG_Count2 = len(Age_Group2["SN"].unique())
PP_AvgTot_AG2 =Age_Group2_Total_Purchase/Unique_AG_Count2

Age_Group3 = purchase_data.groupby(["Age Summary"]).get_group(("15-19"))
AG_Count3 = len(Age_Group3["SN"])
Age_Group3_Total_Purchase = Age_Group3["Price"].sum()
Age_Group3_Avg_Price= Age_Group3_Total_Purchase/len(Age_Group3["Price"])
Unique_AG_Count3 = len(Age_Group3["SN"].unique())
PP_AvgTot_AG3 =Age_Group3_Total_Purchase/Unique_AG_Count3

Age_Group4 = purchase_data.groupby(["Age Summary"]).get_group(("20-24"))
AG_Count4 = len(Age_Group4["SN"])
Age_Group4_Total_Purchase = Age_Group4["Price"].sum()
Age_Group4_Avg_Price= Age_Group4_Total_Purchase/len(Age_Group4["Price"])
Unique_AG_Count4 = len(Age_Group4["SN"].unique())
PP_AvgTot_AG4 =Age_Group4_Total_Purchase/Unique_AG_Count4

Age_Group5 = purchase_data.groupby(["Age Summary"]).get_group(("25-29"))
AG_Count5 = len(Age_Group5["SN"])
Age_Group5_Total_Purchase = Age_Group5["Price"].sum()
Age_Group5_Avg_Price= Age_Group5_Total_Purchase/len(Age_Group5["Price"])
Unique_AG_Count5 = len(Age_Group5["SN"].unique())
PP_AvgTot_AG5 =Age_Group5_Total_Purchase/Unique_AG_Count5

Age_Group6 = purchase_data.groupby(["Age Summary"]).get_group(("30-34"))
AG_Count6 = len(Age_Group6["SN"])
Age_Group6_Total_Purchase = Age_Group6["Price"].sum()
Age_Group6_Avg_Price= Age_Group6_Total_Purchase/len(Age_Group6["Price"])
Unique_AG_Count6 = len(Age_Group6["SN"].unique())
PP_AvgTot_AG6 =Age_Group6_Total_Purchase/Unique_AG_Count6

Age_Group7 = purchase_data.groupby(["Age Summary"]).get_group(("35-39"))
AG_Count7 = len(Age_Group7["SN"])
Age_Group7_Total_Purchase = Age_Group7["Price"].sum()
Age_Group7_Avg_Price= Age_Group7_Total_Purchase/len(Age_Group7["Price"])
Unique_AG_Count7 = len(Age_Group7["SN"].unique())
PP_AvgTot_AG7 =Age_Group7_Total_Purchase/Unique_AG_Count7

Age_Group8 = purchase_data.groupby(["Age Summary"]).get_group(("40+"))
AG_Count8 = len(Age_Group8["SN"])
Age_Group8_Total_Purchase = Age_Group8["Price"].sum()
Age_Group8_Avg_Price= Age_Group8_Total_Purchase/len(Age_Group8["Price"])
Unique_AG_Count8 = len(Age_Group8["SN"].unique())
PP_AvgTot_AG8 =Age_Group8_Total_Purchase/Unique_AG_Count8

In [37]:
Purchase_By_AgeGroup = {"":["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"],
                      "Purchase Count":[AG_Count1,AG_Count2,AG_Count3,AG_Count4,AG_Count5,AG_Count6,AG_Count7,AG_Count8], 
                    "Average Purchase Price":[Age_Group1_Avg_Price,Age_Group2_Avg_Price,Age_Group3_Avg_Price,Age_Group4_Avg_Price,Age_Group5_Avg_Price,Age_Group6_Avg_Price,Age_Group7_Avg_Price,Age_Group8_Avg_Price],
                    "Total Purchase Value":[Age_Group1_Total_Purchase,Age_Group2_Total_Purchase,Age_Group3_Total_Purchase,Age_Group4_Total_Purchase,Age_Group5_Total_Purchase,Age_Group6_Total_Purchase,Age_Group7_Total_Purchase,Age_Group8_Total_Purchase],
                "Avg Total Purchase per Person":[PP_AvgTot_AG1,PP_AvgTot_AG2,PP_AvgTot_AG3,PP_AvgTot_AG4,PP_AvgTot_AG5,PP_AvgTot_AG6,PP_AvgTot_AG7,PP_AvgTot_AG8]}

Purchase_By_AgeGroup2 = pd.DataFrame(Purchase_By_AgeGroup)
Purchase_By_AgeGroup2 = Purchase_By_AgeGroup2.set_index("")
Purchase_By_AgeGroup2 = Purchase_By_AgeGroup2.style.format({"Average Purchase Price": "${:.2f}","Total Purchase Value": "${:.2f}","Avg Total Purchase per Person": "${:.2f}"})

Purchase_By_AgeGroup2

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
,,,,
<10,23.0,$3.35,$77.13,$4.54
10-14,28.0,$2.96,$82.78,$3.76
15-19,136.0,$3.04,$412.89,$3.86
20-24,365.0,$3.05,$1114.06,$4.32
25-29,101.0,$2.90,$293.00,$3.81
30-34,73.0,$2.93,$214.00,$4.12
35-39,41.0,$3.60,$147.67,$4.76
40+,13.0,$2.94,$38.24,$3.19


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [40]:
#Top Spenders
Sn_Total_Purchase = purchase_data.groupby('SN')['Price'].sum().to_frame()
Sn_Purchase_Count = purchase_data.groupby('SN')['Price'].count().to_frame()
Sn_Purchase_Avg = purchase_data.groupby('SN')['Price'].mean().to_frame()

Sn_Total_Purchase.columns=["Total Purchase Value"]
Join1 = Sn_Total_Purchase.join(Sn_Purchase_Count, how="left")
Join1.columns=["Total Purchase Value", "Purchase Count"]

Join2 = Join1.join(Sn_Purchase_Avg, how="inner")
Join2.columns=["Total Purchase Value", "Purchase Count", "Average Purchase Price"]

Top_Spenders_df = Join2[["Purchase Count", "Average Purchase Price", "Total Purchase Value"]]
Top_Spenders_final = Top_Spenders_df.sort_values('Total Purchase Value', ascending=False).head()
Top_Spenders_final.style.format({"Average Purchase Price": "${:.2f}", "Total Purchase Value": "${:.2f}"})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [41]:
#Most Popular Items
Pre_Merge1 = purchase_data.groupby("Item Name").sum().reset_index()
Pre_Merge2 = purchase_data.groupby("Item ID").sum().reset_index()
Pre_Merge3 = purchase_data.groupby("Item Name").count().reset_index()

Merge1 = pd.merge(Pre_Merge1, Pre_Merge2, on="Price")
Merge2 = pd.merge(Pre_Merge3, Merge1, on="Item Name")

Merge2["Gender"] = (Merge2["Price_y"]/Merge2["Item ID"])
Merge2_renamed = Merge2.rename(columns={"Age": "Purchase Count", "Gender": "Item Price", "Item ID": "null", "Price_y": "Total Purchase Value", "Item ID_y": "Item ID"})

Clean_df = Merge2_renamed[["Item ID", "Item Name", "Purchase Count", "Item Price", "Total Purchase Value"]]

Final_df = Clean_df.set_index(['Item ID','Item Name'])
Most_Popular =Final_df.sort_values("Purchase Count", ascending=False).head()
Most_Popular.style.format({"Item Price": "${:.2f}", "Total Purchase Value": "${:.2f}"})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [42]:
#Most Profitable Items
Most_Profitable =Final_df.sort_values("Total Purchase Value", ascending=False).head()
Most_Profitable.style.format({"Item Price": "${:.2f}", "Total Purchase Value": "${:.2f}"})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
