In [1]:
# Dependencies and Setup
import pandas as pd
from IPython.display import HTML

styles = [
    dict(selector="th", props=[("font-size", "150%"),
                               ("text-align", "center")]),
    dict(selector="caption", props=[("caption-side", "bottom")])
]

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

styles = [
    dict(selector="th", props=[("font-size", "150%"),
                               ("text-align", "center")]),
    dict(selector="caption", props=[("caption-side", "bottom")])
]

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count
Display the total number of players

In [2]:
totalPlayers = len(purchase_data['SN'].unique())
playerCount = {'Total Players': [len(purchase_data['SN'].unique())]}


playerCount_df = pd.DataFrame(data=playerCount)
html = (playerCount_df.style.hide_index().set_table_styles(styles))
html



Total Players
576


In [6]:
#Retrieve the Item ID, Item Name, and Item Price columns
item_sales_df = purchase_data[["Item ID", "Item Name", "Price"]]
item_sales_df.head()

Unnamed: 0,Item ID,Item Name,Price
0,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,143,Frenzied Scimitar,1.56
2,92,Final Critic,4.88
3,100,Blindscythe,3.27
4,131,Fury,1.44


In [7]:
#Run basic calculations to obtain number of unique items, average price, etc.

uniqueItems = len(item_sales_df['Item ID'].unique())
averagePrice = item_sales_df['Price'].mean()
purchaseCount = item_sales_df['Item ID'].count()
totalRevenue = item_sales_df['Price'].sum()
results_df = pd.DataFrame({'Number of Unique Items':[uniqueItems],'Average Price':[averagePrice], 'Number of Purchases':[purchaseCount], 'Total Revenue':[totalRevenue]})

results_df['Average Price'] = results_df['Average Price'].map('${:,.2f}'.format)
results_df['Total Revenue'] = results_df['Total Revenue'].map('${:,.2f}'.format)



styles = [
    dict(selector="th", props=[("font-size", "150%"),
                               ("text-align", "center")]),
    dict(selector="caption", props=[("caption-side", "bottom")])
]
html = (results_df.style.hide_index().set_table_styles(styles))
html

Number of Unique Items,Average Price,Number of Purchases,Total Revenue
183,$3.05,780,"$2,379.77"


## Gender Demographics
* Percentage and Count of Male Players
* Percentage and Count of Female Players
* Percentage and Count of Other / Non-Disclosed

In [8]:
user_gender_df = purchase_data[['SN', 'Gender']].copy().drop_duplicates(['SN','Gender'])
maleTotal = user_gender_df.Gender.str.contains("Male").sum()
femaleTotal = user_gender_df.Gender.str.contains("Female").sum()
otherTotal = totalPlayers - (maleTotal + femaleTotal)
malePercentage = (maleTotal/totalPlayers)
femalePercentage = (femaleTotal/totalPlayers) 
otherPercentage = (otherTotal/totalPlayers) 


# initialise data of lists. 
data = {'Total Count':[maleTotal, femaleTotal, otherTotal], 'Percentage of Players':["{0:.2%}".format(malePercentage), "{0:.2%}".format(femalePercentage), "{0:.2%}".format(otherPercentage)]} 
  
# Creates pandas DataFrame. 
gender_df = pd.DataFrame(data, index =['Male', 'Female', 'Other / Non-Disclosed']) 
  
# print the data 
html = (gender_df.style.hide_index().set_table_styles(styles))
html


Total Count,Percentage of Players
484,84.03%
81,14.06%
11,1.91%


## Purchasing Analysis (Gender)
* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
* Create a summary data frame to hold the results
* Optional: give the displayed data cleaner formatting
* Display the summary data frame

In [9]:
#Get a dataframe of grouped by gender    
genderPurchases_df = purchase_data.groupby(["Gender"]).sum()
price_s = genderPurchases_df['Price']
#Format the price column
genderPurchases_df['Total Purchase Value'] = genderPurchases_df['Price'].map('${:,.2f}'.format).rename(columns={'Price': 'Total Purchase Value'})

#genderPurchases_df
#Get a series of total purchases by gender and add to the dataframe as Purchase Count
genderPurchases_df["Purchase Count"] = purchase_data.groupby(["Gender"]).count()["Price"]
#genderPurchases_df
# Get a series of mean purchase price by gender and add to the dataframe 
genderPurchases_df["Average Purchase Price"] = purchase_data.groupby(["Gender"]).mean()["Price"].map("${:.2f}".format)
genderPurchases_df["Avg Total Purchase per Person"] = price_s /genderPurchases_df["Purchase Count"]
genderPurchases_df["Avg Total Purchase per Person"] = genderPurchases_df["Avg Total Purchase per Person"].map("${:.2f}".format)

genderPurchases_df.drop(columns=['Age', 'Price','Item ID', 'Purchase ID' ], axis=1, inplace=True)
genderPurchases_df = genderPurchases_df[['Purchase Count','Average Purchase Price','Total Purchase Value','Avg Total Purchase per Person']]
genderPurchases_df
html = (genderPurchases_df.style.set_table_styles(styles))
html

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$3.20
Male,652,$3.02,"$1,967.64",$3.02
Other / Non-Disclosed,15,$3.35,$50.19,$3.35


## Age Demographics

* Establish bins for ages
* Categorize the existing players using the age bins. Hint: use pd.cut()
* Calculate the numbers and percentages by age group
* Create a summary data frame to hold the results
* Optional: round the percentage column to two decimal points
* Display Age Demographics Table

In [34]:
# Define bins 
bins = [0, 9, 14, 19, 24, 29, 34, 39, 999]
# Create labels for these bins
group_labels = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']

age_df  = purchase_data.copy().drop_duplicates(['SN','Age'])
age_df["Age Group"] = pd.cut(age_df["Age"], bins, labels=group_labels)

# Create a GroupBy object based upon "View Group"
age_group = age_df.groupby( [ "Age Group"] , as_index=False).count()
# Rename Columns
age_group = age_group[["Age Group", "SN"]].rename(columns={'SN': 'Total Count'}) 

# Get the Summation Column
age_group["Percentage of Players"] =age_group['Total Count'] / age_group['Total Count'].sum()

html = (age_group.style.hide_index().set_table_styles(styles))
html

Age Group,Total Count,Percentage of Players
<10,17,0.0295139
10-14,22,0.0381944
15-19,107,0.185764
20-24,258,0.447917
25-29,77,0.133681
30-34,52,0.0902778
35-39,31,0.0538194
40+,12,0.0208333


In [11]:



# # Create a GroupBy object based upon "View Group"
# age_data = purchase_data.groupby(["Age"]).sum()

# # Find how many rows fall into each bin
# age_data.head()
age_df  = purchase_data.copy().drop_duplicates(['SN','Age'])
age_df["Age Group"] = pd.cut(age_df["Age"], bins, labels=group_labels)


age_df =age_df.groupby(["Age Group"])
age_count = age_df.size()
age_avg_price = age_df["Price"].mean().map("${:.2f}".format)
age_tot_value = age_df["Price"].sum().map("${:.2f}".format)

age_demographics = pd.DataFrame({"Purchase Count":age_count,
                             "Average Purchase Price": age_avg_price,
                             "Total Purchase Value": age_tot_value})

age_demographics = age_demographics[["Purchase Count","Average Purchase Price", "Total Purchase Value" ]]


html = (age_demographics.style.set_table_styles(styles))
html

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,17,$3.39,$57.63
10-14,22,$3.07,$67.64
15-19,107,$3.10,$331.88
20-24,258,$3.06,$790.39
25-29,77,$2.91,$223.93
30-34,52,$2.92,$151.92
35-39,31,$3.51,$108.81
40+,12,$3.04,$36.45
