In [94]:
import pandas as pd
import numpy as np

In [47]:
# Player Count

purchase = pd.read_csv('purchase_data.csv')
purchase.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [48]:
## Total Number of Players

total_players = len(purchase.SN.unique())
total_players

576

In [162]:
# Purchasing Analysis (Total)

## Number of Unique Items
unique_items = len(purchase['Item Name'].unique())
unique_items

## Average Purchase Price
avg_price = purchase.Price.mean()
round(avg_price,2)

## Total Number of Purchases
total_purchases = len(purchase['Purchase ID'].unique())
total_purchases

## Total Revenue
total_revenue = purchase.Price.sum()
total_revenue

purchase_analysis = {'Unique Items': [unique_items],
                     'Average Purchase Price': [round(avg_price,2)],
                     'Total Number of Purchases': [total_purchases],
                     'Total Revenue': [total_revenue]}
purchase_analysis = pd.DataFrame(purchase_analysis, columns=['Unique Items', 
                                                             'Average Purchase Price', 
                                                             'Total Number of Purchases', 
                                                             'Total Revenue'])
purchase_analysis


Unnamed: 0,Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


In [215]:
# Gender Demographics

### Filter by unique IDs
genders = ['Female', 'Male', 'Other / Non-Disclosed']
gender_grouped = purchase.groupby(['Gender'])['SN'].unique()

## Percentage and Count of Female Players
## Percentage and Count of Male Players
## Percentage and Count of Other / Non-Disclosed Players
for i in genders:
    gender_grouped[i] = len(gender_grouped[i])

gender_grouped = pd.DataFrame(gender_grouped)
gender_grouped['Percentage'] = gender_grouped['SN']/total_players *100
# gender_grouped.columns = gender_grouped['Count', 'Percentage']
gender_grouped

Unnamed: 0_level_0,SN,Percentage
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.0625
Male,484,84.0278
Other / Non-Disclosed,11,1.90972


In [210]:
# Purchasing Analysis (Gender)

# The below each broken by gender

## Purchase Count
## Average Purchase Price
## Total Purchase Value
genders_df = purchase[['Gender', 'Price']]

summary_gender = genders_df.groupby(['Gender']).agg([np.count_nonzero, np.mean, np.sum])
summary_gender.columns = summary_gender.columns.droplevel()
summary_gender.columns = ['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']

summary_gender


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113.0,3.203009,361.94
Male,652.0,3.017853,1967.64
Other / Non-Disclosed,15.0,3.346,50.19


In [211]:
## Average Purchase Total per Person by Gender
purchase.groupby(['Gender', 'SN'])['Price'].mean()


Gender                 SN           
Female                 Adastirin33      4.480
                       Aerithllora36    4.320
                       Aethedru70       3.540
                       Aidain51         3.450
                       Aiduesu86        4.480
                                        ...  
Other / Non-Disclosed  Lirtim36         1.330
                       Maluncil97       2.640
                       Rairith81        2.220
                       Siarithria38     3.455
                       Sundim98         4.750
Name: Price, Length: 576, dtype: float64

In [218]:
# Age Demographics

# The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)
bins = [0, 10, 15, 20, 25, 30, 35, 40, 45]

group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44']

purchase["Age Group"] = pd.cut(purchase["Age"], bins, labels=group_names)
age_grouped = purchase.groupby(['Age Group'])['SN'].unique()
for i in group_names:
    age_grouped[i] = len(age_grouped[i])
age_grouped = pd.DataFrame(age_grouped)
age_grouped['Percentage'] = age_grouped['SN']/total_players *100
# gender_grouped.columns = gender_grouped['Count', 'Percentage']
age_grouped

Unnamed: 0_level_0,SN,Percentage
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,24,4.16667
10-14,41,7.11806
15-19,150,26.0417
20-24,232,40.2778
25-29,59,10.2431
30-34,37,6.42361
35-39,26,4.51389
40-44,7,1.21528


In [224]:
## Purchase Count
## Average Purchase Price
## Total Purchase Value
age_df = purchase[["Age Group", 'Price']]

summary_age = age_df.groupby("Age Group").agg([np.count_nonzero, np.mean, np.sum])
summary_age.columns = ['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']

summary_age

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,32.0,3.405,108.96
10-14,54.0,2.9,156.6
15-19,200.0,3.1078,621.56
20-24,325.0,3.020431,981.64
25-29,77.0,2.875584,221.42
30-34,52.0,2.994423,155.71
35-39,33.0,3.404545,112.35
40-44,7.0,3.075714,21.53


In [233]:
## Average Purchase Total per Person by Age Group
age_df = purchase[['SN', "Age Group", 'Price']]
age_df.groupby(["Age Group", 'SN']).agg([np.mean])

Unnamed: 0_level_0,Unnamed: 1_level_0,Price
Unnamed: 0_level_1,Unnamed: 1_level_1,mean
Age Group,SN,Unnamed: 2_level_2
<10,Adairialis76,
<10,Adastirin33,
<10,Aeda94,
<10,Aela59,
<10,Aelaria33,
...,...,...
40-44,Yathecal82,
40-44,Yathedeu43,
40-44,Yoishirrala98,
40-44,Zhisrisu83,


In [226]:
# Top Spenders

# Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
top_5_spenders = purchase.groupby('SN')['Price'].sum().nlargest(5)
top_5_spenders = top_5_spenders.index.values.tolist()
spenders_df = purchase[purchase.SN.isin(top_5_spenders)]
spenders_df = spenders_df[['SN', 'Price']]

spenders_df.head()


Unnamed: 0,SN,Price
74,Lisosia93,4.64
120,Lisosia93,3.81
128,Iral74,4.14
148,Iskadarya95,4.03
222,Chamjask73,4.23


In [227]:
## SN
## Purchase Count
## Average Purchase Price
## Total Purchase Value
summary_spenders = spenders_df.groupby('SN').agg([np.count_nonzero, np.mean, np.sum])
summary_spenders.columns = summary_spenders.columns.droplevel()
summary_spenders.columns = ['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']

summary_spenders


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Chamjask73,3.0,4.61,13.83
Idastidru52,4.0,3.8625,15.45
Iral74,4.0,3.405,13.62
Iskadarya95,3.0,4.366667,13.1
Lisosia93,5.0,3.792,18.96


In [228]:
# Most Popular Items

# Identify the 5 most popular items by purchase count, then list (in a table):
top_5_items = purchase.groupby('Item ID')['Item Name'].count().nlargest(5)
top_5_items = top_5_items.index.values.tolist()
items_df = purchase[purchase['Item ID'].isin(top_5_items)]
items_df = items_df[['Item ID', 'Item Name', 'Price']]

items_df.head()


Unnamed: 0,Item ID,Item Name,Price
0,108,"Extraction, Quickblade Of Trembling Hands",3.53
18,82,Nirvana,4.9
25,178,"Oathbreaker, Last Hope of the Breaking Storm",4.23
56,108,"Extraction, Quickblade Of Trembling Hands",3.53
61,178,"Oathbreaker, Last Hope of the Breaking Storm",4.23


In [229]:
## Item ID
## Item Name
## Purchase Count
## Item Price
## Total Purchase Value

summary_items = items_df.groupby(['Item ID', 'Item Name']).agg([np.count_nonzero, np.mean, np.sum])
summary_items.columns = summary_items.columns.droplevel()
summary_items = summary_items.reset_index()
summary_items.columns = ['Item ID', 'Item Name', 'Purchase Count', 'Item Price', 'Total Purchase Value']

summary_items


Unnamed: 0,Item ID,Item Name,Purchase Count,Item Price,Total Purchase Value
0,19,"Pursuit, Cudgel of Necromancy",8.0,1.02,8.16
1,82,Nirvana,9.0,4.9,44.1
2,108,"Extraction, Quickblade Of Trembling Hands",9.0,3.53,31.77
3,145,Fiery Glass Crusader,9.0,4.58,41.22
4,178,"Oathbreaker, Last Hope of the Breaking Storm",12.0,4.23,50.76


In [230]:
# Most Profitable Items

# Identify the 5 most profitable items by total purchase value, then list (in a table):
top_5_profits = purchase.groupby('Item ID')['Price'].sum().nlargest(5)
top_5_profits = top_5_profits.index.values.tolist()
profits_df = purchase[purchase['Item ID'].isin(top_5_profits)]
profits_df = profits_df[['Item ID', 'Item Name', 'Price']]

profits_df.head()


Unnamed: 0,Item ID,Item Name,Price
2,92,Final Critic,4.88
18,82,Nirvana,4.9
25,178,"Oathbreaker, Last Hope of the Breaking Storm",4.23
61,178,"Oathbreaker, Last Hope of the Breaking Storm",4.23
62,178,"Oathbreaker, Last Hope of the Breaking Storm",4.23


In [231]:
## Item ID
## Item Name
## Purchase Count
## Item Price
## Total Purchase Value

summary_profits = profits_df.groupby(['Item ID', 'Item Name']).agg([np.count_nonzero, np.mean, np.sum])
summary_profits = summary_profits.reset_index()
summary_profits.columns = ['Item ID', 'Item Name', 'Purchase Count', 'Item Price', 'Total Purchase Value']

summary_profits


Unnamed: 0,Item ID,Item Name,Purchase Count,Item Price,Total Purchase Value
0,82,Nirvana,9.0,4.9,44.1
1,92,Final Critic,8.0,4.88,39.04
2,103,Singed Scalpel,8.0,4.35,34.8
3,145,Fiery Glass Crusader,9.0,4.58,41.22
4,178,"Oathbreaker, Last Hope of the Breaking Storm",12.0,4.23,50.76
