In [4]:
# Import dependencies and set-up

import pandas as pd

# Load purchase_data File

purchase_data = "Resources/purchase_data.csv"

# Read purchasing file and store into a Pandas data frame

purchase_data = pd.read_csv(purchase_data)

purchase_data.head()



Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [5]:
# Display Total Number of Players from purchase_data by counting "SN"
Total_Players = len(purchase_data["SN"].value_counts())

# Create a data frame of Total Players
Total_Player_Count = pd.DataFrame({"Total Players Count":[Total_Players]})
Total_Player_Count


Unnamed: 0,Total Players Count
0,576


In [6]:
# Run basic calculations to obtain number of unique items

# Unique Items
Unique = len(purchase_data["Item Name"].unique())

# Average Price

Average_Price= purchase_data["Price"].mean()

# Number of Purchases

Number_Purchases = purchase_data["Purchase ID"].count()

Total_Revenue = (purchase_data["Price"].sum())

# Summary of obtained values

summary_table = pd.DataFrame({"Number of Purchases":[Number_Purchases], "Unique Items":[Unique], "Average Price":[Average_Price], "Total Revenue":[Total_Revenue]})


summary_table

Unnamed: 0,Number of Purchases,Unique Items,Average Price,Total Revenue
0,780,179,3.050987,2379.77


In [91]:
# Gender Demographics
Find Total Gender Demographics

Total_Gender_Demographics = purchase_data["Gender"].value_counts()
Gender_Demographic_Percentages = Total_Gender_Demographics / Total_Players * 100
Gender_Demographics = pd.DataFrame({"Total Count": Total_Gender_Demographics, "Percentage of Players": Gender_Demographic_Percentages})

# To round up

Gender_Demographics = Gender_Demographics.round(2)

Gender_Demographics




Unnamed: 0,Total Count,Percentage of Players
Male,652,113.19
Female,113,19.62
Other / Non-Disclosed,15,2.6


In [112]:
# Do Purchasing Analysis by Gender
Purchase_Count = purchase_data.groupby(["Gender"]).count()["Price"]
Purchase_Total = purchase_data.groupby(["Gender"]).sum()["Price"]
Average_Purchase = purchase_data.groupby(["Gender"]).mean()["Price"]
Average_Purchase_Person = Purchase_Total / Gender_Demographics["Total Count"]

Purchasing_Analysis = pd.DataFrame({"Purchase Count": Purchase_Count, "Total Purchase Value": Purchase_Total, "Average Purchase Price": Average_Purchase, "Average Purchase Per Person": Average_Purchase_Person})
Purchasing_Analysis


Unnamed: 0_level_0,Purchase Count,Total Purchase Value,Average Purchase Price,Average Purchase Per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,361.94,3.203009,3.203009
Male,652,1967.64,3.017853,3.017853
Other / Non-Disclosed,15,50.19,3.346,3.346


In [120]:
# Age Demographics
# Create bins for ages
Bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
Group_Names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

# Categorize players based on age bins
purchase_data["Players Age Group"] = pd.cut(purchase_data["Age"], Bins, labels=Group_Names)
purchase_data

# Group players by age
Age_Group = purchase_data.groupby("Players Age Group")
Total_Count_By_Age = Age_Group["SN"].nunique()

# Find percentage by age
Percent_By_Age = (Total_Count_By_Age / Total_Players) * 100
Percent_By_Age = Percent_By_Age.round(2)

Age_Demographics = pd.DataFrame({"Percentage of Players": Percent_By_Age, "Total Count": Total_Count_By_Age})
Age_Demographics

Unnamed: 0_level_0,Percentage of Players,Total Count
Players Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,2.95,17
10-14,3.82,22
15-19,18.58,107
20-24,44.79,258
25-29,13.37,77
30-34,9.03,52
35-39,5.38,31
40+,2.08,12


In [124]:
# Conduct Purchasing Analysis by Age
# Bin purchase data by age
Bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
Group_Names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

purchase_data["Players Age"] = pd.cut(purchase_data["Age"], Bins, labels=Group_Names)
purchase_data

# Find purchase count, avg. purchase price, avg. purchase total per person
Purchase_Count_By_Age = purchase_data.groupby(["Players Age"]).count()["Price"]
Purchase_Total_By_Age = purchase_data.groupby(["Players Age"]).sum()["Price"]
Average_Purchase_By_Age = purchase_data.groupby(["Players Age"]).mean()["Price"]
Average_Purchase_Per_Person = Purchase_Total_By_Age / Age_Demographics["Total Count"]

Purchasing_Analysis = pd.DataFrame({"Purchase Count": Purchase_Count_By_Age, "Total Purchase": Purchase_Total_By_Age, "Average Purchase": Average_Purchase_By_Age, "Average Purchase Per Person": Average_Purchase_Per_Person})
Purchasing_Analysis

Unnamed: 0_level_0,Purchase Count,Total Purchase,Average Purchase,Average Purchase Per Person
Players Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,77.13,3.353478,4.537059
10-14,28,82.78,2.956429,3.762727
15-19,136,412.89,3.035956,3.858785
20-24,365,1114.06,3.052219,4.318062
25-29,101,293.0,2.90099,3.805195
30-34,73,214.0,2.931507,4.115385
35-39,41,147.67,3.601707,4.763548
40+,13,38.24,2.941538,3.186667


In [128]:
# Find Top Spenders
Top_Users_Count = purchase_data.groupby("SN").count()["Price"]
Top_Users_Total = purchase_data.groupby("SN").sum()["Price"]
Top_Users_Avg = purchase_data.groupby("SN").mean()["Price"]

Top_Spenders = pd.DataFrame({"Purchase Count": Top_Users_Count, "Average Purchase Price": Top_Users_Avg, "Total Purchase Value": Top_Users_Total})
Top_Spenders

Top_Spenders_df = Top_Spenders.sort_values("Purchase Count", ascending=False)
Top_Spenders_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Iral74,4,3.405,13.62
Idastidru52,4,3.8625,15.45
Asur53,3,2.48,7.44
Inguron55,3,3.703333,11.11


In [20]:
# Extract data

Most_Popular_Items= purchase_data.loc[:,["Item ID", "Item Name", "Price"]]

# Obtain values

Total_Purchase_Value = purchase_data.groupby(["Item Name"]).sum()["Price"]

Items_Count = purchase_data.groupby(["Item Name"]).count()["Price"]

Average_Price = purchase_data.groupby(["Item Name"]).mean()["Price"]

Most_Popular_Items = pd.DataFrame({"Total Purchase Value": Total_Purchase_Value, "Item Count": Items_Count, "Item Price": Average_Price})
Most_Popular_Items



Most_Popular_df = Most_Popular_Items.sort_values("Item Count", ascending=False)
Most_Popular_df.head()

Unnamed: 0_level_0,Total Purchase Value,Item Count,Item Price
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Final Critic,59.99,13,4.614615
"Oathbreaker, Last Hope of the Breaking Storm",50.76,12,4.23
Persuasion,28.99,9,3.221111
Nirvana,44.1,9,4.9
"Extraction, Quickblade Of Trembling Hands",31.77,9,3.53


In [21]:
Most_Popular_Items.sort_values("Total Purchase Value", ascending=False)

Unnamed: 0_level_0,Total Purchase Value,Item Count,Item Price
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Final Critic,59.99,13,4.614615
"Oathbreaker, Last Hope of the Breaking Storm",50.76,12,4.230000
Nirvana,44.10,9,4.900000
Fiery Glass Crusader,41.22,9,4.580000
Singed Scalpel,34.80,8,4.350000
...,...,...,...
"Flux, Destroyer of Due Diligence",2.12,2,1.060000
Whistling Mithril Warblade,2.00,2,1.000000
Exiled Mithril Longsword,2.00,1,2.000000
Gladiator's Glaive,1.93,1,1.930000
