In [312]:
import pandas as pd
sourceCSV = "Resources/purchase_data.csv"
purchaseData_df = pd.read_csv(sourceCSV)

In [313]:
playerCount = len(purchaseData_df["SN"].unique()) #counting the number of unique values from this column inside the data frame
playerCount_df = pd.DataFrame([playerCount], columns=["Total Players"]) #creating new data frame for displaying count of total unique players
playerCount_df.style.hide_index()

Total Players
576


In [314]:
uniqueItemsCount = len(purchaseData_df["Item Name"].unique()) #getting the number of unique items
purchaseCount = len(purchaseData_df["Price"]) #getting the length of the Price column to add up the number of sales
totalRevenue = sum(purchaseData_df["Price"]) #adding together the contents of the Price column to get the total revenue
avgPrice = totalRevenue / purchaseCount #dividing the total revenue by the number of sales to get the average price per sale

totalPurchaseAnalysis_df = pd.DataFrame(#adding column names and values to the data frame as a dictionary of lists 
    {
    "Unique Items": [uniqueItemsCount],
    "Average Price": [avgPrice],
    "Number of Purchases": [purchaseCount],
    "Total Revenue": [totalRevenue]
    }
)
totalPurchaseAnalysis_df["Average Price"] = totalPurchaseAnalysis_df["Average Price"].astype(float).map("${:,.2f}".format) #formatting to have a dollar sign
totalPurchaseAnalysis_df["Total Revenue"] = totalPurchaseAnalysis_df["Total Revenue"].astype(float).map("${:,.2f}".format) #in front and be two decimal places
totalPurchaseAnalysis_df.style.hide_index()

Unique Items,Average Price,Number of Purchases,Total Revenue
179,$3.05,780,"$2,379.77"


In [315]:
uniqueUsers_df = pd.DataFrame(purchaseData_df)
uniqueUsers_df["SN"] = pd.DataFrame(purchaseData_df["SN"].unique()) #adding null entries for duplicate/repeat players
uniqueUsers_df = uniqueUsers_df.dropna(subset=["SN"]) #dropping all null entries for "SN" column from data frame

uniqueMaleCount = len(uniqueUsers_df.loc[uniqueUsers_df["Gender"] == "Male"]) #getting the number of males from the unique users data frame
uniqueFemaleCount = len(uniqueUsers_df.loc[uniqueUsers_df["Gender"] == "Female"]) #getting the number of females from the unique users data frame
uniqueTotalCount = len(uniqueUsers_df["Gender"])
uniqueOtherCount = uniqueTotalCount - (uniqueMaleCount + uniqueFemaleCount) #getting the number of other because the value must not be male or female so calculating the difference

uniqueMalePercent = (uniqueMaleCount / uniqueTotalCount) * 100
uniqueFemalePercent = (uniqueFemaleCount / uniqueTotalCount) * 100
uniqueOtherPercent = (uniqueOtherCount / uniqueTotalCount) * 100

gender_df = pd.DataFrame(
    {
    "Total Count": [uniqueMaleCount, uniqueFemaleCount, uniqueOtherCount], #populating the data frame columns with the above values
    "Percentage of Players": [uniqueMalePercent, uniqueFemalePercent, uniqueOtherPercent] 
    },
    index=["Male", "Female", "Other"] #giving names to the index values
)

gender_df["Percentage of Players"] = gender_df["Percentage of Players"].astype(float).map("{:,.2f}%".format) #formatting to have a percent sign

gender_df

Unnamed: 0,Total Count,Percentage of Players
Male,476,82.64%
Female,88,15.28%
Other,12,2.08%


In [316]:
males_df = purchaseData_df.loc[purchaseData_df["Gender"] == "Male"]
females_df = purchaseData_df.loc[purchaseData_df["Gender"] == "Female"]
other_df = purchaseData_df.loc[purchaseData_df["Gender"] == "Other / Non-Disclosed"]

avgMalePurchasePrice = totalMalePurchases / len(males_df) #total amount spent per gender divided by number of purchases per gender to get average purchase price
avgFemalePurchasePrice = totalFemalePurchases / len(females_df) 
avgOtherPurchasePrice = totalOtherPurchases / len(other_df) 

totalMalePurchases = sum(males_df["Price"]) #total purchases summed together for all of each gender
totalFemalePurchases = sum(females_df["Price"])
totalOtherPurchases = sum(other_df["Price"])

avgMalePurchasePerPerson = totalMalePurchases / uniqueMaleCount #total amount spent per unique player of each gender divided by number of purchases per gender to get average purchase price
avgFemalePurchasePerPerson = totalFemalePurchases / uniqueFemaleCount
avgOtherPurchasePerPerson = totalOtherPurchases / uniqueOtherCount

genderAnalysis_df = pd.DataFrame(
    {
    "Purchase Count": [len(males_df), len(females_df), len(other_df)], #populating the data frame columns with the above values
    "Avg Purchase": [avgMalePurchasePrice, avgFemalePurchasePrice, avgOtherPurchasePrice],
    "Total Purchase": [totalMalePurchases, totalFemalePurchases, totalOtherPurchases],
    "Avg Purchase per Person": [avgMalePurchasePerPerson, avgFemalePurchasePerPerson, avgOtherPurchasePerPerson]
    },
    index=["Male", "Female", "Other"] #giving names to the index values
)

genderAnalysis_df["Avg Purchase"] = genderAnalysis_df["Avg Purchase"].astype(float).map("${:,.2f}".format) #formatting to have a dollar sign
genderAnalysis_df["Total Purchase"] = genderAnalysis_df["Total Purchase"].astype(float).map("${:,.2f}".format)
genderAnalysis_df["Avg Purchase per Person"] = genderAnalysis_df["Avg Purchase per Person"].astype(float).map("${:,.2f}".format)

genderAnalysis_df

Unnamed: 0,Purchase Count,Avg Purchase,Total Purchase,Avg Purchase per Person
Male,652,$3.02,"$1,967.64",$4.13
Female,113,$3.20,$361.94,$4.11
Other,15,$3.35,$50.19,$4.18
