In [1]:
#import pandas and load data into df
import pandas as pd
import numpy as np

file = "Resources/purchase_data.csv"

purchases_df = pd.read_csv(file)
purchases_df.head(10)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


In [2]:
#support functions
def column_formatter(format_dict, df):
    """
    Takes a dictionary of columns mapped to format
    returns the dataframe with new formatting applied
    """
    for column, form in format_dict.items():
        df[column] = df[column].map(form.format)
    return df

In [3]:
#Unique players in purchase data
players_df = pd.DataFrame({"Total Players": [purchases_df["SN"].nunique()]})
players_df

Unnamed: 0,Total Players
0,576


In [4]:
#purchases overview
unique_items = purchases_df["Item ID"].nunique()
mean_purchase_price = purchases_df["Price"].mean()
total_purchases = len(purchases_df)
total_rev = purchases_df["Price"].sum()

purchase_overview_df = pd.DataFrame([[unique_items, mean_purchase_price, total_purchases, total_rev]],
                                    columns=[
                                        "Unique Items Purchases",
                                        "Average Price per Purchase",
                                        "Total Number of Purchases",
                                        "Total Revenue"
                                    ])
format_map = {"Average Price per Purchase" : "${:,.2f}", "Total Revenue" : "${:,.2f}"}

column_formatter(format_map, purchase_overview_df)

Unnamed: 0,Unique Items Purchases,Average Price per Purchase,Total Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [5]:
#Gender demographics
gender_group = purchases_df.groupby("Gender")
gender_count = gender_group["SN"].nunique()
gender_breakdown_df = pd.DataFrame(
    [[gender_count.index[0], gender_count[0], (gender_count[0]/gender_count.sum()) * 100],
    [gender_count.index[1], gender_count[1], (gender_count[1]/gender_count.sum()) * 100],
    [gender_count.index[2], gender_count[2], (gender_count[2]/gender_count.sum()) * 100]],
    columns = ["Gender", "Count", "Percentage"])
format_map = {"Percentage" : "{:.2f}%"}
column_formatter(format_map, gender_breakdown_df)

Unnamed: 0,Gender,Count,Percentage
0,Female,81,14.06%
1,Male,484,84.03%
2,Other / Non-Disclosed,11,1.91%


In [6]:
#Purchasing Analysis by gender
gender_analysis_df = gender_breakdown_df.drop(columns=["Percentage"])
gender_analysis_df["Average Purchase"] = gender_group["Price"].mean().values
gender_analysis_df["Total Purchase Value"] = gender_group["Price"].sum().values
gender_analysis_df["Average Total Purchase per Person"] = gender_analysis_df.iloc[:,3] / gender_analysis_df.iloc[:,1]
format_map = {"Average Purchase":"${:,.2f}","Total Purchase Value": "${:,.2f}","Average Total Purchase per Person":"${:,.2f}"}
column_formatter(format_map,gender_analysis_df)

Unnamed: 0,Gender,Count,Average Purchase,Total Purchase Value,Average Total Purchase per Person
0,Female,81,$3.20,$361.94,$4.47
1,Male,484,$3.02,"$1,967.64",$4.07
2,Other / Non-Disclosed,11,$3.35,$50.19,$4.56
