In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
from IPython.display import display

In [2]:
# Specifying file path
file_path="Resources/purchase_data.csv"

In [35]:
# Read purchase_data file and stores into pandas data frame
purchase_read=pd.read_csv(file_path, delimiter=',').copy()
players_df = pd.DataFrame(purchase_read).copy()
players_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [36]:
# Display of total number of players in a data frame
uniq_players=len(players_df["SN"].unique())
total_players=pd.DataFrame({"Total Players":[uniq_players]})
total_players

Unnamed: 0,Total Players
0,576


In [37]:
# Printing number of unique items, average price, total purchase and total revenue
uniq_items=len(players_df["Item ID"].unique())
avg_price=round(players_df["Price"].mean(),2)
total_purchase=players_df["Purchase ID"].count()
total_revenue=players_df["Price"].sum()
pd.DataFrame({"Number of Unique Items":[uniq_items], "Average Price":[avg_price], 
              "Total Purchase":[total_purchase], "Total Revenue":[total_revenue]})

Unnamed: 0,Number of Unique Items,Average Price,Total Purchase,Total Revenue
0,179,3.05,780,2379.77


In [38]:
# Gender Demographics
male_players=players_df.loc[players_df["Gender"]=="Male",:]                          # conditional checking with .loc
male_players_count=len(male_players['SN'].unique())                                        # count of male players with reference to SN column and len funtion
female_players=players_df.loc[players_df["Gender"]=="Female",:]
female_players_count=len(female_players['SN'].unique())
other_players=players_df.loc[players_df["Gender"]=="Other / Non-Disclosed",:]
other_players_count=len(other_players['SN'].unique())
total_players=other_players_count+female_players_count+male_players_count                  # Total players count

In [39]:
gender_df=pd.DataFrame({"Total Count":[male_players_count, female_players_count,other_players_count],          # New data frame to get the desired output
              "Percentage of Players":["{0:.2%}".format(male_players_count/total_players), 
                                      "{0:.2%}".format(female_players_count/total_players),
                                      "{0:.2%}".format(other_players_count/total_players)]})

In [40]:
gender_df.set_index([pd.Index(['Male', 'Female', 'Other / Non-Disclosed'])])                           # Set index as desired using set_index funtion

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [41]:
# Purchasing analysis (Gender)
male_purchase_count=len(male_players['Purchase ID'].unique())
female_purchase_count=len(female_players['Purchase ID'].unique())
other_purchase_count=len(other_players['Purchase ID'].unique())
male_tot_purchase=male_players['Price'].sum()
female_tot_purchase=female_players['Price'].sum()
other_tot_purchase=other_players['Price'].sum()

In [42]:
# purchasing analysis dataframe(Gender)
purchase_df=pd.DataFrame({'Purchase Count':[male_purchase_count,female_purchase_count,other_purchase_count],
                         'Average Purchase Price':['${0:.2f}'.format(male_tot_purchase/male_purchase_count),
                                                   '${0:.2f}'.format(female_tot_purchase/female_purchase_count),
                                                   '${0:.2f}'.format(other_tot_purchase/other_purchase_count)],
                          'Total Purcase Value':[male_tot_purchase,female_tot_purchase,other_tot_purchase],
                          'Avg Total Purchase Per Person':['${0:.2f}'.format(male_tot_purchase/male_players_count),
                                                           '${0:.2f}'.format(female_tot_purchase/female_players_count),
                                                           '${0:.2f}'.format(other_tot_purchase/other_players_count)]})

In [43]:
# Set index as per the output
purchase_df.set_index([pd.Index(['Male', 'Female', 'Other / Non-Disclosed'])])

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purcase Value,Avg Total Purchase Per Person
Male,652,$3.02,1967.64,$4.07
Female,113,$3.20,361.94,$4.47
Other / Non-Disclosed,15,$3.35,50.19,$4.56


In [44]:
# Age Demographics
# bins/buckets for groupig data
bins=[0,9,14,19,24,29,34,39,100]
# labels in accordance with the bin values
age_group=['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+'] 
# pd.cut is used to cut data with bin values
players_df['Age Ranges']=pd.cut(players_df.Age, bins, labels=age_group) 
# dataframe is rearranging based "SN" values
age_demo=players_df.drop_duplicates(['SN']) 
# data values sorted with Age and then arranged by groupby and count functions
df=age_demo.sort_values('Age').groupby('Age Ranges').count() 
# A new dataframe is created to the desired output
df=pd.DataFrame({"Count":(df.Age), 'Percentage of Players':(df.Age/576)})
# style formatting into percentage
df.style.format({'Percentage of Players': "{0:.2%}"})                                     

Unnamed: 0_level_0,Count,Percentage of Players
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


In [45]:
age_ranges=players_df.sort_values('Age').groupby('Age Ranges').count()
# purchase_count=pd.DataFrame({"Purchase Count":(age_ranges.Age)})
purchase_count=age_ranges.Age
# average purchase price 
pur_analysis=players_df.sort_values('Age').groupby('Age Ranges')
avg_purchase_price=round(pur_analysis.Price.mean(),2)
# total purchase price
tot_purchase_price=pur_analysis.Price.sum()

# average total purchase per person
# duplicate drop based column SN and sorting and grouping to get the count in ranges
tot_uniq_person=players_df.drop_duplicates(['SN'])
uniq_person_grp=tot_uniq_person.sort_values('Age').groupby('Age Ranges')
# finding the the count in ranges based on 'Purchase ID'
range_person_count=uniq_person_grp['Purchase ID'].count()
# finding the average
avg_purchase_per_person=round(tot_purchase_price/range_person_count,2)

In [46]:
# purchase analysis(age) dataframe
purchase_analysis=pd.DataFrame({"Purchase Count":age_ranges.Age, "Average Purchase Price($)":
                            avg_purchase_price, "Total Purchase Value":tot_purchase_price,
                               "Avg Total Purchase Per Person($)":avg_purchase_per_person})
purchase_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price($),Total Purchase Value,Avg Total Purchase Per Person($)
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,3.35,77.13,4.54
10-14,28,2.96,82.78,3.76
15-19,136,3.04,412.89,3.86
20-24,365,3.05,1114.06,4.32
25-29,101,2.9,293.0,3.81
30-34,73,2.93,214.0,4.12
35-39,41,3.6,147.67,4.76
40+,13,2.94,38.24,3.19
