### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data



## Player Count

* Display the total number of players


In [None]:
totalNumPlayers = purchase_data.SN.value_counts().count()
totalNumPlayers 

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [None]:
numUniqueItems = purchase_data["Item ID"].value_counts().count()
numUniqueItems

In [None]:
avgPrice = purchase_data["Price"].mean()
avgPrice

In [None]:
numPurchases = purchase_data["Purchase ID"].count()
numPurchases

In [None]:
totalRevenue = purchase_data["Price"].sum()
totalRevenue    

In [None]:
purchasingTotalSummary_df = pd.DataFrame({
    "Number of Unique Items":[numUniqueItems],
    "Average Price": [f"${round(avgPrice,2)}"],
    "Number of Purchases":[numPurchases],
    "Total Revenue": [f"${round(totalRevenue,2)}"]
    })
purchasingTotalSummary_df

## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [None]:
# Filter purchases completed by Male players
malePurchases = purchase_data.loc[purchase_data["Gender"] == "Male"]

#Count of Male players
numMalePlayers = len(malePurchases["SN"].unique())
numMalePlayers


In [None]:
# Filter purchases completed by Female players
femalePurchases = purchase_data.loc[purchase_data["Gender"] == "Female"]

#Count of Male players
numFemalePlayers = len(femalePurchases["SN"].unique())
numFemalePlayers
femalePurchases




In [None]:
#Percentage of Male players
percentFemalePlayers = round(numFemalePlayers / totalNumPlayers * 100,2)
percentFemalePlayers

In [None]:
# Filter purchases completed by Other / Non-Disclosed players
otherGenderPurchases = purchase_data.loc[purchase_data["Gender"] == "Other / Non-Disclosed"]

#Count of Other / Non-Disclosed players
numOtherGenderPlayers = len(otherGenderPurchases["SN"].unique())
numOtherGenderPlayers


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
#Prepare Data sets by Column
genderCol = ["Female", "Male", "Other / Non-Disclosed"]
purchaseCountCol = [femalePurchases["Purchase ID"].count(), malePurchases["Purchase ID"].count(), otherGenderPurchases["Purchase ID"].count()]
avgPurchasePriceCol = [femalePurchases["Price"].mean(), malePurchases["Price"].mean(), otherGenderPurchases["Price"].mean()]
totalPurchaseValueCol = [femalePurchases["Price"].sum(), malePurchases["Price"].sum(), otherGenderPurchases["Price"].sum()]
avgTotalPurchasePerPersonCol = [femalePurchases["Price"].sum()/numFemalePlayers, malePurchases["Price"].sum()/numMalePlayers, otherGenderPurchases["Price"].sum()/numOtherGenderPlayers]

#Create Gender Purchase Analysis Summary Dataframe
genderPurchaseAnalysisSummary = pd.DataFrame({"Gender":genderCol,"Purchase Count":purchaseCountCol, "Average Purchase Price": avgPurchasePriceCol, "Total Purchase Value": totalPurchaseValueCol, "Avg Total Purchase per Person": avgTotalPurchasePerPersonCol})

#Apply accounting number format style
genderPurchaseAnalysisSummary["Average Purchase Price"] = genderPurchaseAnalysisSummary["Average Purchase Price"].astype(float).map("${:,.2f}".format)
genderPurchaseAnalysisSummary["Total Purchase Value"] = genderPurchaseAnalysisSummary["Total Purchase Value"].astype(float).map("${:,.2f}".format)
genderPurchaseAnalysisSummary["Avg Total Purchase per Person"] = genderPurchaseAnalysisSummary["Avg Total Purchase per Person"].astype(float).map("${:,.2f}".format)

#Set Index to Gender
genderPurchaseAnalysisSummary.set_index("Gender")


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [None]:
# Establish bins for ages
bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 100]
ageGroup_names = ["<10", "10-14","15-19","20-24","25-29","30-34","35-39","40+"]


In [None]:
# Categorize the existing players using the age bins. Hint: use pd.cut()
purchase_data_age = purchase_data
purchase_data_age["Age Ranges"] = pd.cut(purchase_data_age["Age"], bins, labels=ageGroup_names, include_lowest=True)
# Calculate the numbers and percentages by age group
purchase_data_age_group = purchase_data_age.groupby("Age Ranges")
purchase_data_age_groupPlayerCount = purchase_data_age_group["SN"].unique()
playerCountCol = [ len(x) for x in purchase_data_age_groupPlayerCount ]
print(playerCountCol)
playerCountPercentCol = [ f'{round(x/totalNumPlayers * 100,2)}%' for x in playerCountCol ]
print(playerCountPercentCol)

ageDemographicsSummary = pd.DataFrame({
    "Age Range": ageGroup_names,
    "Total Counts": playerCountCol,
    "Percentage of Players": playerCountPercentCol
})
ageDemographicsSummary.set_index("Age Range")








## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
# Bin the purchase_data data frame by age
purchase_data_age_group = purchase_data_age.groupby("Age Ranges")
purchaseCountCol = purchase_data_age_group["Purchase ID"].count()
print(purchaseCountCol)

In [None]:
averagePurchasePriceCol = purchase_data_age_group["Price"].mean()
averagePurchasePriceColfmt = averagePurchasePriceCol.map("${:,.2f}".format)
print(averagePurchasePriceColfmt)


In [None]:
totalPurchaseValueCol = purchase_data_age_group["Price"].sum()
# totalPurchaseValueCol = [x for x in totalPurchaseValueCol]
totalPurchaseValueColfmt = totalPurchaseValueCol.map("${:,.2f}".format)
print(totalPurchaseValueCol)

In [221]:
purchaseAnalysisSummary = pd.DataFrame({
    "Purchase Count": purchaseCountCol,
    "Average Purchase Price": averagePurchasePriceCol,
    "Total Purchase Value": totalPurchaseValueCol
})
purchaseAnalysisSummary


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,23,3.353478,77.13
10-14,28,2.956429,82.78
15-19,136,3.035956,412.89
20-24,365,3.052219,1114.06
25-29,101,2.90099,293.0
30-34,73,2.931507,214.0
35-39,41,3.601707,147.67
40+,13,2.941538,38.24


In [224]:
purchaseAnalysisSummary["Avg Total Purchase per Person"] = ""
avgPurchasePerPersonCol = purchaseAnalysisSummary["Total Purchase Value"] / playerCountCol

purchaseAnalysisSummaryfmt["Avg Total Purchase per Person"] = avgPurchasePerPersonCol.map("${:,.2f}".format)
purchaseAnalysisSummaryfmt["Average Purchase Price"] = averagePurchasePriceCol.map("${:,.2f}".format)
purchaseAnalysisSummaryfmt["Total Purchase Value"] = totalPurchaseValueCol.map("${:,.2f}".format)

purchaseAnalysisSummaryfmt



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Ranges,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [242]:
purchase_data_TopSpenders = purchase_data
topSpenderGroup = purchase_data_TopSpenders.groupby("SN").count()
topSpenderGroup.sort_values("Purchase ID")
# topSpenderPurchaseCountCol = topSpenderGroup["Purchase ID"]


Unnamed: 0_level_0,Purchase ID,Age,Gender,Item ID,Item Name,Price,Age Ranges
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Adairialis76,1,1,1,1,1,1,1
Lisovynya38,1,1,1,1,1,1,1
Lisotesta51,1,1,1,1,1,1,1
Lisossanya98,1,1,1,1,1,1,1
Lisossala30,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...
Iri67,3,3,3,3,3,3,3
Aina42,3,3,3,3,3,3,3
Idastidru52,4,4,4,4,4,4,4
Iral74,4,4,4,4,4,4,4


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

