In [43]:
# Dependencies and Setup
import pandas as pd
import numpy as nm

In [44]:
# Specifying file path
file_path="Resources/purchase_data.csv"

In [45]:
# Read purchase_data file and stores into pandas data frame
purchase_read=pd.read_csv(file_path, delimiter=',')
players_df = pd.DataFrame(purchase_read)
players_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [46]:
# Display of total number of players in a data frame
uniq_players=len(purchase_read["SN"].unique())
total_players=pd.DataFrame({"Total Players":[uniq_players]})
total_players
#total_players.style.set_table_styles({'selector':'th','props':('border','0.5px black solid !important; color: black !important;)})

Unnamed: 0,Total Players
0,576


In [47]:
# Printing number of unique items, average price, total purchase and total revenue
uniq_items=len(purchase_read["Item ID"].unique())
avg_price=round(purchase_read["Price"].mean(),2)
total_purchase=purchase_read["Purchase ID"].count()
total_revenue=purchase_read["Price"].sum()
pd.DataFrame({"Number of Unique Items":[uniq_items], "Average Price":[avg_price], 
              "Total Purchase":[total_purchase], "Total Revenue":[total_revenue]})

Unnamed: 0,Number of Unique Items,Average Price,Total Purchase,Total Revenue
0,179,3.05,780,2379.77


In [48]:
# Gender Demographics
male_players=purchase_read.loc[purchase_read["Gender"]=="Male",:]
male_players_count=len(male_players['SN'].unique())
female_players=purchase_read.loc[purchase_read["Gender"]=="Female",:]
female_players_count=len(female_players['SN'].unique())
other_players=purchase_read.loc[purchase_read["Gender"]=="Other / Non-Disclosed",:]
other_players_count=len(other_players['SN'].unique())
total_players=other_players_count+female_players_count+male_players_count

In [49]:
gender_df=pd.DataFrame({"Total Count":[male_players_count, female_players_count,other_players_count], 
              "Percentage of Players":["{0:.2%}".format(male_players_count/total_players), 
                                      "{0:.2%}".format(female_players_count/total_players),
                                      "{0:.2%}".format(other_players_count/total_players)]})

In [50]:
gender_df.set_index([pd.Index(['Male', 'Female', 'Other / Non-Disclosed'])])

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [51]:
# Purchasing analysis (Gender)
male_purchase_count=len(male_players['Purchase ID'].unique())
female_purchase_count=len(female_players['Purchase ID'].unique())
other_purchase_count=len(other_players['Purchase ID'].unique())
male_tot_purchase=male_players['Price'].sum()
female_tot_purchase=female_players['Price'].sum()
other_tot_purchase=other_players['Price'].sum()

In [52]:
purchase_df=pd.DataFrame({'Purchase Count':[male_purchase_count,female_purchase_count,other_purchase_count],
                         'Average Purchase Price':['${0:.2f}'.format(male_tot_purchase/male_purchase_count), 
                                                   '${0:.2f}'.format(female_tot_purchase/female_purchase_count),
                                                   '${0:.2f}'.format(other_tot_purchase/other_purchase_count)],
                         'Total Purcase Value':[male_tot_purchase,female_tot_purchase,other_tot_purchase],
                          'Avg Total Purchase Per Person':['${0:.2f}'.format(male_tot_purchase/male_players_count), 
                                                   '${0:.2f}'.format(female_tot_purchase/female_players_count),
                                                   '${0:.2f}'.format(other_tot_purchase/other_players_count)]})

In [53]:
purchase_df.set_index([pd.Index(['Male', 'Female', 'Other / Non-Disclosed'])])

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purcase Value,Avg Total Purchase Per Person
Male,652,$3.02,1967.64,$4.07
Female,113,$3.20,361.94,$4.47
Other / Non-Disclosed,15,$3.35,50.19,$4.56


In [54]:
# Age demographics
# age_demo=purchase_read.sort_values(["Age"])
age_bins=[0,8.9,13.9,18.9,23.9,28.9,33.9,38.9,100]
age_group=["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]
age_demo["Total Count"]=pd.cut(age_demo["Age"], age_bins, labels=age_group,
                                    include_lowest=False, right=False )
age_demo

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Total Count
33,33,Haillyrgue51,7,Male,44,Bonecarvin Battle Axe,2.38,<10
778,778,Sisur91,7,Male,92,Final Critic,4.19,<10
446,446,Chanossast57,7,Female,119,"Stormbringer, Dark Blade of Ending Misery",4.32,<10
515,515,Haillyrgue51,7,Male,40,Second Chance,2.52,<10
658,658,Quilassa66,7,Female,178,"Oathbreaker, Last Hope of the Breaking Storm",4.23,<10
...,...,...,...,...,...,...,...,...
557,557,Frichaya88,42,Male,8,"Purgatory, Gem of Regret",3.93,40+
674,674,Aeral68,43,Male,77,"Piety, Guardian of Riddles",4.00,40+
248,248,Isursuir31,44,Male,137,"Aetherius, Boon of the Blessed",3.39,40+
728,728,Chanosiaya39,44,Male,93,Apocalyptic Battlescythe,1.97,40+


In [55]:
demo=age_demo.groupby('Total Count')
age_demo_count=pd.DataFrame(demo["Age"].value_counts())
age_demo_count


Unnamed: 0_level_0,Unnamed: 1_level_0,Age
Total Count,Age,Unnamed: 2_level_1
<10,7,9
<10,8,8
10-14,10,9
10-14,11,7
10-14,9,6
10-14,12,6
10-14,13,4
15-19,15,35
15-19,16,30
15-19,18,26
