In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
data_path = 'Resources/purchase_data.csv'

# Read Purchasing File and store into Pandas data frame
purchase_df = pd.read_csv(data_path)


## Player Count

In [2]:
players = purchase_df['SN'].unique()
pl = [len(players)]
ply = pd.DataFrame(pl)
ply.rename(columns = {0:'Total players'}, inplace = True)
ply

Unnamed: 0,Total players
0,576


## Purchasing Analysis

In [3]:
#calculate number of unique items
items = purchase_df['Item Name'].unique()
unique_items = len(items)

#calculate average price
avg_price = purchase_df['Price'].mean()

#calculate number of purchases
n_purchase = len(purchase_df['Price'])

#calculate total revenue
t_revenue = purchase_df['Price'].sum()

In [4]:
#Create Data Frame with values
pa_df = pd.DataFrame(
        {'Number of Unique Items' : [len(items)],
          'Average Price' : [avg_price],
          'Number of Purchases' : [n_purchase], 
           'Total Revenue' : [t_revenue]})

#Format the DF values to currency values   
pa_df['Average Price'] = pa_df['Average Price'].map('${:.2f}'.format)
pa_df['Total Revenue'] = pa_df['Total Revenue'].map('${:,.2f}'.format)
pa_df.head()

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

In [5]:
#Retreve colums that will be used to obtain gender demographics
players_gender_df = purchase_df.loc[:,['SN','Age' ,'Gender']]

#keep unique values for SN in order to count each player only once
players_gender_df.drop_duplicates(subset = 'SN', keep = 'first', inplace = True)

#count players per gender
gender_df = pd.DataFrame(players_gender_df['Gender'].value_counts())

#create new column with Percentage of players
gender_df['Percentage of Players'] = gender_df['Gender']/gender_df['Gender'].sum()*100

#Formating  values to % and changing columns name
gender_df['Percentage of Players'] = gender_df['Percentage of Players'].map('{:.2f}%'.format)
gender_df.rename(columns = {'Gender': 'Total Count'}, inplace = True)
gender_df.head()

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


## Purchasing Analysis (Gender)

####  - Male values

In [6]:
male_purchase = purchase_df.loc[purchase_df['Gender'] == 'Male',:]

#Male purhcase count
mpc = male_purchase['Price'].count()

#male average purchae price
mavp = male_purchase['Price'].mean()

#male total purchase count
mtp = male_purchase['Price'].sum()

#male purchase per person average
pp = male_purchase.loc[:,['SN','Price']]
mpp_df = pd.DataFrame(pp.groupby(by = ['SN']).sum())
mpp = mpp_df['Price'].mean()


#### - Female values

In [7]:
female_purchase = purchase_df.loc[purchase_df['Gender'] == 'Female',:]
#female purhcase count
fpc = female_purchase['Price'].count()

#female average purchae price
favp = female_purchase['Price'].mean()

#female total purchase count
ftp = female_purchase['Price'].sum()

#female purchase per person average
pp1 = female_purchase.loc[:,['SN','Price']]
fpp_df = pd.DataFrame(pp1.groupby(by = ['SN']).sum())
fpp = fpp_df['Price'].mean()

#### -Other / Non-Disclosed Values

In [8]:
other_purchase = purchase_df.loc[purchase_df['Gender'] == 'Other / Non-Disclosed',:]
#other purhcase count
opc = other_purchase['Price'].count()

#other average purchae price
oavp = other_purchase['Price'].mean()

#other total purchase count
otp = other_purchase['Price'].sum()

#other purchase per person average
pp2 = other_purchase.loc[:,['SN','Price']]
opp_df = pd.DataFrame(pp2.groupby(by = ['SN']).sum())
opp = opp_df['Price'].mean()


In [9]:
# generating purchase analysis (gender) Data Frame
gender_df = pd.DataFrame(
        {'Purhcase Count' : [mpc ,fpc, opc],
          'Average Purchae Price' : [mavp, favp, oavp],
          'Total Purchase Value' : [mtp, ftp, otp], 
           'Avg Total Purchase per Person' : [mpp, fpp, opp]},
    index = ['Male', 'Female', 'Other / Non-Disclosed'])

#Formating the DF values to currency values
gender_df['Average Purchae Price'] = gender_df['Average Purchae Price'].map('${:.2f}'.format)
gender_df['Total Purchase Value'] = gender_df['Total Purchase Value'].map('${:,.2f}'.format)
gender_df['Avg Total Purchase per Person'] = gender_df['Avg Total Purchase per Person'].map('${:.2f}'.format)
gender_df

Unnamed: 0,Purhcase Count,Average Purchae Price,Total Purchase Value,Avg Total Purchase per Person
Male,652,$3.02,"$1,967.64",$4.07
Female,113,$3.20,$361.94,$4.47
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age demographics

In [10]:
#setting bins to generate age groups
bins=[0, 9, 14, 19, 24, 29, 34, 39, 100]
age= ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

#cut df into age groups
players_gender_df['age_range'] = pd.cut(players_gender_df['Age'], bins, labels = age, include_lowest = True)
ppa = pd.DataFrame(players_gender_df['age_range'].value_counts(sort = False))

#Adding the pecentage of players column
ppa['Percentage of Players'] = ppa['age_range']/pl*100

#changign column names and editing content format
ppa.rename(columns = {'age_range': 'Total Count'}, inplace = True)
ppa['Percentage of Players'] = ppa['Percentage of Players'].map('{:.2f}%'.format)

ppa

Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


# Purchasing Analysis (Age)


In [11]:
#Generating age groups
bins =[0, 9, 14, 19, 24, 29, 34, 39, 100]
age = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']
purchase_age_df = purchase_df
purchase_age_df['age_range'] = pd.cut(purchase_df['Age'], bins, labels = age, include_lowest = True)

In [12]:
#counting values per age group and turning the info into a data frame
purchase_count = purchase_age_df['age_range'].value_counts(sort = False).rename_axis('age_range').reset_index(name='a')
purchase_count.columns = ['age_range','purchase count']

#grouping by age range in order to generate total and average values
paa_age = purchase_age_df.groupby('age_range')
avg_pp = paa_age[['Price']].mean()
tot_pv = paa_age[['Price']].sum()

#calculating average purchase per person
ppp_arange = purchase_age_df[['age_range', 'SN', 'Price']]
av_ppp = ppp_arange.groupby(['age_range', 'SN']).sum().reset_index().groupby('age_range').mean()

#Merging results into one Data Frame
paa_df = pd.merge(purchase_count,avg_pp, on = 'age_range')
paa_df = pd.merge(paa_df,tot_pv, on = 'age_range')
paa_df = pd.merge(paa_df,av_ppp, on = 'age_range')
paa_df

#Formating Data Frame  value appearance
paa_df.rename(columns = {'age_range': 'Age Ranges', 'purchase count': 'Purchase Count',
                      'Price_x':'Average Purchase Price', 'Price_y':'Total Purchase Value',
                      'Price':'Avg Total Purchase per Person'}, inplace = True)

paa_df['Average Purchase Price'] = paa_df['Average Purchase Price'].map('${:.2f}'.format)
paa_df['Total Purchase Value'] = paa_df['Total Purchase Value'].map('${:,.2f}'.format)
paa_df['Avg Total Purchase per Person'] = paa_df['Avg Total Purchase per Person'].map('${:.2f}'.format)
paa_df

Unnamed: 0,Age Ranges,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
0,<10,23,$3.35,$77.13,$4.54
1,10-14,28,$2.96,$82.78,$3.76
2,15-19,136,$3.04,$412.89,$3.86
3,20-24,365,$3.05,"$1,114.06",$4.32
4,25-29,101,$2.90,$293.00,$3.81
5,30-34,73,$2.93,$214.00,$4.12
6,35-39,41,$3.60,$147.67,$4.76
7,40+,13,$2.94,$38.24,$3.19


## Top Spenders

In [13]:
top_sp = pd.DataFrame(purchase_df.loc[:,['SN', 'Price']])

#Generate Purhcase count values per player
pur_count = top_sp['SN'].value_counts().rename_axis('SN').reset_index(name='b')
pur_count.columns = ['SN','purchase count']

#Generate average and total purhcase values
avg_pur_p = top_sp.groupby('SN').mean()
tot_pur_p = top_sp.groupby('SN').sum()

#Merge data into one Data Frame
top_spender_df = pd.merge(pur_count,avg_pur_p, on = 'SN')
top_spender_df = pd.merge(top_spender_df,tot_pur_p, on = 'SN')

#Rename columns
top_spender_df.rename(columns = {'purchase count': 'Purchase Count', 'Price_x': 'Average Purchase Price',
                      'Price_y':'Total Purchase Value'}, inplace = True)

#sort acording to Total Purchase Value in order to get the top spenders
top_spender_sorted = top_spender_df.sort_values(by=['Total Purchase Value'],ascending = False)

#Format Data Frame  value appearance
top_spender_sorted['Average Purchase Price'] = top_spender_sorted['Average Purchase Price'].map('${:.2f}'.format)
top_spender_sorted['Total Purchase Value'] = top_spender_sorted['Total Purchase Value'].map('${:.2f}'.format)
top_spender_sorted.head()

Unnamed: 0,SN,Purchase Count,Average Purchase Price,Total Purchase Value
0,Lisosia93,5,$3.79,$18.96
1,Idastidru52,4,$3.86,$15.45
3,Chamjask73,3,$4.61,$13.83
2,Iral74,4,$3.40,$13.62
13,Iskadarya95,3,$4.37,$13.10


## Most Popular Items

In [14]:
#Retrieve the Item ID, Item Name, and Item Price columns
most_popular = pd.DataFrame(purchase_df.loc[:,['Item ID','Item Name', 'Price']])

#Generate purchase count column
item_pur_count = most_popular['Item ID'].value_counts().rename_axis('Item ID').reset_index(name='a')
item_pur_count.columns = ['Item ID','purchase count']

#Generate Average purhcase price column
mp_item_price = most_popular.groupby(['Item ID','Item Name']).mean().reset_index()

#Generate Total purchase value
mp_item_pvalue = most_popular.groupby(['Item ID','Item Name']).sum().reset_index()

#Merge data into one Data Frame
mpi = pd.merge(mp_item_price,item_pur_count, on = 'Item ID')
mpi = pd.merge(mpi,mp_item_pvalue, on = 'Item ID')

#Drop Duplicate columns
mpi = mpi.drop(['Item Name_y'], axis = 1)

#Reorder columns
mpi = mpi[['Item ID' , 'Item Name_x', 'purchase count', 'Price_x', 'Price_y' ]]

#Rename columns
mpi.rename(columns = {'Item Name_x': 'Item Name', 'purchase count': 'Purchase Count',
                    'Price_x':'Item Price', 'Price_y':'Total Purchase Value'}, inplace = True)

#Sort Values acording to Purchase count to obtain most popular items
mpopular = mpi.sort_values(by = ['Purchase Count'], ascending = False)

mpopular['Item Price'] = mpopular['Item Price'].map('${:.2f}'.format)
mpopular['Total Purchase Value'] = mpopular['Total Purchase Value'].map('${:.2f}'.format)

mpopular.head()

Unnamed: 0,Item ID,Item Name,Purchase Count,Item Price,Total Purchase Value
90,92,Final Critic,13,$4.61,$59.99
174,178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
141,145,Fiery Glass Crusader,9,$4.58,$41.22
129,132,Persuasion,9,$3.22,$28.99
105,108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


# Most Profitable Items

In [15]:
mprofit = mpi.sort_values(by =['Total Purchase Value'], ascending = False)
mprofit['Item Price'] = mprofit['Item Price'].map('${:.2f}'.format)
mprofit['Total Purchase Value'] = mprofit['Total Purchase Value'].map('${:.2f}'.format)
mprofit.head()

Unnamed: 0,Item ID,Item Name,Purchase Count,Item Price,Total Purchase Value
90,92,Final Critic,13,$4.61,$59.99
174,178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
80,82,Nirvana,9,$4.90,$44.10
141,145,Fiery Glass Crusader,9,$4.58,$41.22
100,103,Singed Scalpel,8,$4.35,$34.80
