### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head(40)


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


## Player Count

* Display the total number of players


In [2]:
TotRows = purchase_data["Price"].count()
print ("Total rows = ", TotRows)
print(" ")
TotPlayers = len(purchase_data["SN"].value_counts())
print ("Total Players = ", TotPlayers)


Total rows =  780
 
Total Players =  576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
UniqItms = len(purchase_data["Item ID"].value_counts())
AvePrc = purchase_data["Price"].mean()
ItemsPurch = purchase_data["Price"].count()
TotRev = purchase_data["Price"].sum()
#
print (
    f" UniqItms= {UniqItms} AvePrc= ${AvePrc:,.2f} ItemsPurch= {ItemsPurch}  TotRev= ${TotRev:,.2f} "
      )
# Convert the summary data into a DataFrame
SummaryData_df = pd.DataFrame({"Number of Unique Items":[UniqItms],
                               "Average Price":[AvePrc],
                               "Number of Purchases":[ItemsPurch],
                               "Total Revenue":[TotRev]
                              })
SummaryData_df.head()


 UniqItms= 183 AvePrc= $3.05 ItemsPurch= 780  TotRev= $2,379.77 


Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.050987,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players
purchase_data["Gender" = "Male"].count()

* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [4]:

CntMale = purchase_data["Gender"].value_counts()['Male']
CntFemale = purchase_data["Gender"].value_counts()['Female']
CntOther = purchase_data["Gender"].value_counts()['Other / Non-Disclosed']
CntTot = CntMale + CntFemale + CntOther
print(
    f" Total: {CntTot}\n Male: {CntMale}\n Female: {CntFemale}\n Non_specfic: {CntOther}")
print(" ")
PctMale = (CntMale / CntTot) * 100
PctFemale = (CntFemale / CntTot) * 100
PctOther = (CntOther / CntTot) * 100
print(
    f" % Male: {PctMale}\n % Female: {PctFemale}\n % Non_specifc: {PctOther}")

 Total: 780
 Male: 652
 Female: 113
 Non_specfic: 15
 
 % Male: 83.58974358974359
 % Female: 14.487179487179489
 % Non_specifc: 1.9230769230769231



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [5]:
OnlyMale = purchase_data.loc[purchase_data["Gender"] == "Male", :] 
SumMale = OnlyMale["Price"].sum()
AveMale = OnlyMale["Price"].mean()
CntMaleSN = len(OnlyMale["SN"].unique())
AvePerMale = SumMale/CntMaleSN
print(
    f" Male Total: {CntMale}\n Male AvePrice: ${AveMale:,.2f}\n Male TotPrice: ${SumMale:,.2f}\n Male AvePerPerson: ${AvePerMale:,.2f}")
print(" ")
OnlyFemale = purchase_data.loc[purchase_data["Gender"] == "Female", :] 
SumFemale = OnlyFemale["Price"].sum()
AveFemale = OnlyFemale["Price"].mean()
CntFemaleSN = len(OnlyFemale["SN"].unique())
AvePerFemale = SumFemale/CntFemaleSN
print(
    f" Female Total: {CntFemale}\n Female AvePrice: ${AveFemale:,.2f}\n Female TotPrice: ${SumFemale:,.2f}\n Female AvePerPerson: ${AvePerFemale:,.2f}")
print(" ")
OnlyOth = purchase_data.loc[purchase_data["Gender"] == "Other / Non-Disclosed", :] 
SumOth = OnlyOth["Price"].sum()
AveOth = OnlyOth["Price"].mean()
CntOthSN = len(OnlyOth["SN"].unique())
AvePerOth = SumOth/CntOthSN
print(
    f" Other Total: {CntOther}\n Other AvePrice: ${AveOth:,.2f}\n Other TotPrice: ${SumOth:,.2f}\n Other AvePerPerson: ${AvePerOth:,.2f}")
print(" ")
# Create a new frame consolodating above calculations

# Creating a Pandas DataFrame by passing in a LIST OF DICTIONARIES
# Each value in the list is a dictionary
# Imagine that each dictionary represents a row of data in our eventual purchase count, avg. purchase price, avg. purchase total per persontaframe
# Each dictionary should have the same keys, since these keys dictate the column headers of our dataframe
#                        purchase count, avg. purchase price, avg. purchase total per person
my_list = [{"Gender": "Male", "Purchase Count": CntMale, "Average Price Paid": AveMale, "Total Purchase Value": SumMale, "Average Purchase PerPerson": AvePerMale},
           {"Gender": "Female", "Purchase Count": CntFemale, "Average Price Paid": AveFemale, "Total Purchase Value": SumFemale, "Average Purchase PerPerson": AvePerFemale},
           {"Gender": "Other / Non-Disclosed", "Purchase Count": CntOther, "Average Price Paid": AveOth, "Total Purchase Value": SumOth, "Average Purchase PerPerson": AvePerOth}]
ConsolPdDf = pd.DataFrame(my_list)
ConsolPdDf

 Male Total: 652
 Male AvePrice: $3.02
 Male TotPrice: $1,967.64
 Male AvePerPerson: $4.07
 
 Female Total: 113
 Female AvePrice: $3.20
 Female TotPrice: $361.94
 Female AvePerPerson: $4.47
 
 Other Total: 15
 Other AvePrice: $3.35
 Other TotPrice: $50.19
 Other AvePerPerson: $4.56
 


Unnamed: 0,Gender,Purchase Count,Average Price Paid,Total Purchase Value,Average Purchase PerPerson
0,Male,652,3.017853,1967.64,4.065372
1,Female,113,3.203009,361.94,4.468395
2,Other / Non-Disclosed,15,3.346,50.19,4.562727


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [74]:
purchase_data_pd = pd.DataFrame(purchase_data)

# ----ByPrchr_Data_pd is all the data I need including the purchasers with multiple items---
ByPrchr_Data_pd = purchase_data_pd.loc[:,['SN', 'Age', 'Item ID', 'Item Name', 'Price']]

ByPrchr_Price_df = ByPrchr_Data_pd.groupby(["SN"]).sum()["Price"]

# ----ByPrchr_Data_pd is all the data I need by purchasers without the summed price data---
ByPrchr_Uniq_pd = ByPrchr_Data_pd.drop_duplicates(subset='SN')
#      drop unneeded Price Column
ByPrchr_Uniq_pd = ByPrchr_Uniq_pd.drop(columns='Price')

# ----------------sort the uniq's by SN then append...
ByPrchr_UniqSorted_pd = ByPrchr_Uniq_pd.sort_values(by=['SN'])
# --------------------Merge two dataframes using an inner join
#  merge_table = pd.merge(_pd, _pd, on="")
ByPrchr_UniqSorted_pd = pd.merge(ByPrchr_UniqSorted_pd, ByPrchr_Price_df, on='SN')

In [75]:
# Create bins and bin labels for the age column
Aged_df = pd.DataFrame(ByPrchr_UniqSorted_pd,columns=["Age", "SN", "Price"])

Age_bins = [0, 9, 14, 19, 24, 29, 34, 39, 100]
Age_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

In [76]:
# Bin the Age column
# cut() returns a Pandas Series containing each of the binned column's values 
#                                    translated into their corresponding bins
# We can append our bins to Aged_df
Aged_df["Level"] = pd.cut(Aged_df["Age"], Age_bins, labels=Age_labels)


In [77]:
#   Group the Aged data by the Age Level we've created...

# group the purchasers by count of purchases
grouped_AgedByLvlCnt_df = Aged_df.groupby(["Level"]).count()['SN']
grouped_AgedByLvlDol_df = Aged_df.groupby(["Level"]).sum()['Price']
grouped_AgedByLvlCPct_df = grouped_AgedByLvlCnt_df / TotPlayers

# Merge the tables 
ByPrchr_UniqSortedCompl_pd = pd.merge(grouped_AgedByLvlCnt_df, grouped_AgedByLvlCPct_df, on='Level')

# rename the columns
ByPrchrRen_pd = ByPrchr_UniqSortedCompl_pd.rename(columns={"SN_x":"Total Count", "SN_y":"Percentage of Players"})
ByPrchrRen_pd.style.format({'Total Count': "{:.0f}",'Percentage of Players': "{:.2%}"})
                      

Unnamed: 0_level_0,Total Count,Percentage of Players
Level,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [69]:
# ----these values are accumulated by Age Level without regard to purchaser  -----
RawPurch_pd = pd.DataFrame(purchase_data)
RawAged_data_pd = RawPurch_pd
RawAged_data_pd["Level"] = pd.cut(RawPurch_pd["Age"], Age_bins, labels=Age_labels)
# -------------------------------------------Purchase Count
grouped_RawAgedCnt_pd = pd.DataFrame(RawAged_data_pd.groupby(["Level"]).count()['Item ID'])
grouped_RawAgedCnt = RawAged_data_pd.groupby(["Level"]).count()['Item ID']

# group the purchasers by age level ----------Total Purchase Value
grouped_RawAgedDol_pd = pd.DataFrame(RawAged_data_pd.groupby(["Level"]).sum()['Price'])
grouped_RawAgedDol = RawAged_data_pd.groupby(["Level"]).sum()['Price']

# ---------------------------------------------Average Purchase Price
grouped_RawAgedAve_pd = pd.DataFrame(grouped_RawAgedDol / grouped_RawAgedCnt)

# ---------------------------------------------Avg Total Purchase per Person
grouped_AgedByLvlPrctg_pd = pd.DataFrame(grouped_AgedByLvlDol_df / grouped_AgedByLvlCnt_df)

# Merge the tables 
grouped_RawCompl_pd = pd.DataFrame() #=========creates a new dataframe that's empty
grouped_RawCompl_pd = pd.merge(grouped_RawAgedCnt_pd, grouped_RawAgedAve_pd, on='Level')
grouped_RawCompl_pd = pd.merge(grouped_RawCompl_pd, TotRawAgedDol_pd, on='Level')
grouped_RawCompl_pd = pd.merge(grouped_RawCompl_pd, TotAgedByLvlPrctg_pd, on='Level')

grouped_RawCompl_pd = grouped_RawCompl_pd.rename(columns={"Item ID":"Purchase Count"})
grouped_RawCompl_pd = grouped_RawCompl_pd.rename(columns={"0_x":"Average Purchase Price"})
grouped_RawCompl_pd = grouped_RawCompl_pd.rename(columns={"Price":"Total Purchase Value"})
grouped_RawCompl_pd = grouped_RawCompl_pd.rename(columns={"0_y":"Avg Total Purchase per Person"})

grouped_RawCompl_pd.style.format({'Average Purchase Price': "${:.2f}",
                                  'Total Purchase Value': "${:.2f}",
                                  'Avg Total Purchase per Person': "${:.2f}"
                                 })

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Level,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,$1114.06,$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
#purchase_data
print (purchase_data)

UniqPurchasers = len(purchase_data["SN"].unique())
print (UniqPurchasers)

#
TopPurch = purchase_data
#TopSpndrs = purchase_data(purchase_data["SN"].unique().count() > 2)

# To sort from highest to lowest, ascending=False must be passed in
#TopPurch = purchase_data.sort_values("SN", ascending=False)
TopPurch = purchase_data.drop(["Purchase ID","Age","Gender","Item ID","Item Name","Level"], axis=1)

TopPurch = TopPurch.set_index("SN")
TopPurch = TopPurch.sort_values("SN")
print (TopPurch)

## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

