In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
from IPython.display import display

In [2]:
# Specifying file path
file_path="Resources/purchase_data.csv"

In [3]:
# Read purchase_data file and stores into pandas data frame
purchase_read=pd.read_csv(file_path, delimiter=',')
players_df = pd.DataFrame(purchase_read)
players_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [4]:
# Display of total number of players in a data frame
uniq_players=len(purchase_read["SN"].unique())
total_players=pd.DataFrame({"Total Players":[uniq_players]})
total_players

Unnamed: 0,Total Players
0,576


In [5]:
# Printing number of unique items, average price, total purchase and total revenue
uniq_items=len(purchase_read["Item ID"].unique())
avg_price=round(purchase_read["Price"].mean(),2)
total_purchase=purchase_read["Purchase ID"].count()
total_revenue=purchase_read["Price"].sum()
pd.DataFrame({"Number of Unique Items":[uniq_items], "Average Price":[avg_price], 
              "Total Purchase":[total_purchase], "Total Revenue":[total_revenue]})

Unnamed: 0,Number of Unique Items,Average Price,Total Purchase,Total Revenue
0,179,3.05,780,2379.77


In [6]:
# Gender Demographics
male_players=purchase_read.loc[purchase_read["Gender"]=="Male",:]                          # conditional checking with .loc
male_players_count=len(male_players['SN'].unique())                                        # count of male players with reference to SN column and len funtion
female_players=purchase_read.loc[purchase_read["Gender"]=="Female",:]
female_players_count=len(female_players['SN'].unique())
other_players=purchase_read.loc[purchase_read["Gender"]=="Other / Non-Disclosed",:]
other_players_count=len(other_players['SN'].unique())
total_players=other_players_count+female_players_count+male_players_count                  # Total players count

In [7]:
gender_df=pd.DataFrame({"Total Count":[male_players_count, female_players_count,other_players_count],          # New data frame to get the desired output
              "Percentage of Players":["{0:.2%}".format(male_players_count/total_players), 
                                      "{0:.2%}".format(female_players_count/total_players),
                                      "{0:.2%}".format(other_players_count/total_players)]})

In [8]:
gender_df.set_index([pd.Index(['Male', 'Female', 'Other / Non-Disclosed'])])                           # Set index as desired using set_index funtion

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [9]:
# Purchasing analysis (Gender)
male_purchase_count=len(male_players['Purchase ID'].unique())
female_purchase_count=len(female_players['Purchase ID'].unique())
other_purchase_count=len(other_players['Purchase ID'].unique())
male_tot_purchase=male_players['Price'].sum()
female_tot_purchase=female_players['Price'].sum()
other_tot_purchase=other_players['Price'].sum()

In [10]:
purchase_df=pd.DataFrame({'Purchase Count':[male_purchase_count,female_purchase_count,other_purchase_count],               # New data frame to get the desired output
                         'Average Purchase Price':['${0:.2f}'.format(male_tot_purchase/male_purchase_count),               # formating values
                                                   '${0:.2f}'.format(female_tot_purchase/female_purchase_count),
                                                   '${0:.2f}'.format(other_tot_purchase/other_purchase_count)],'Total Purcase Value':[male_tot_purchase,female_tot_purchase,other_tot_purchase],'Avg Total Purchase Per Person':['${0:.2f}'.format(male_tot_purchase/male_players_count),'${0:.2f}'.format(female_tot_purchase/female_players_count),'${0:.2f}'.format(other_tot_purchase/other_players_count)]})

In [11]:
purchase_df.set_index([pd.Index(['Male', 'Female', 'Other / Non-Disclosed'])])                                            # Set index as per the output

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purcase Value,Avg Total Purchase Per Person
Male,652,$3.02,1967.64,$4.07
Female,113,$3.20,361.94,$4.47
Other / Non-Disclosed,15,$3.35,50.19,$4.56


In [12]:
# Age Demographics
bins=[0,9,14,19,24,29,34,39,100]                                                          # bins/buckets for groupig data
age_group=['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']                   # labels in accordance with the bin values
purchase_read[' ']=pd.cut(purchase_read.Age, bins, labels=age_group)                      # pd.cut is used to cut data with bin values
purchase_read=purchase_read.drop_duplicates(['SN'])                                       # dataframe is rearranging based "SN" values
df=purchase_read.sort_values('Age').groupby(' ').count()                                  # data values sorted with Age and then arranged by groupby and count functions
df=pd.DataFrame({"Count":(df.Age), 'Percentage of Players':(df.Age/576)})                 # A new dataframe is created to the desired output
df.style.format({'Percentage of Players': "{0:.2%}"})                                     # style formatting into percentage

Unnamed: 0,Count,Percentage of Players
,,
<10,17.0,2.95%
10-14,22.0,3.82%
15-19,107.0,18.58%
20-24,258.0,44.79%
25-29,77.0,13.37%
30-34,52.0,9.03%
35-39,31.0,5.38%
40+,12.0,2.08%
