In [38]:
import pandas as pd


csvPath =  "./Resources/purchase_data.csv"
itemPD_df = pd.read_csv(csvPath)

#Player count~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#counts only unique by "Screen Name" rows
playerCount = itemPD_df["SN"].nunique()

#print(playerCount)

#Purchasing  Analysis (total)~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#create series to store Purchasing Analysis data
pAnalysis = pd.Series({
    "Number of Unique Items":0,
    "Average Purchase Price": float(0),
    "Total Number of Purchases":0,
    "Total Revenue":0
})

#finds the number of unique items in the data set and stores it in series
pAnalysis["Number of Unique Items"] = itemPD_df["Item Name"].nunique()

#finds the mean of the price column from the data set and stores it in series
pAnalysis["Average Purchase Price"] = itemPD_df["Price"].mean()

#counts the total number of rows within the data set and stores that in series
pAnalysis["Total Number of Purchases"] = itemPD_df["Price"].count()

#adds price column together and stores it in series
pAnalysis["Total Revenue"] = itemPD_df["Price"].sum()

#Gender Demographics~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#create series to store Gender Demographics
gDemo = pd.Series({
    "Male Players":0,
    "Male Player Percentage":0,
    "Female Players":0,
    "Female Player Percentage":0,
    "Other/Non-Disclosed Players":0,
    "Other/Non-Disclosed Player Percentage":0,
})

#remove duplicate players from df based on Screen Name
gDemoDupless_df = itemPD_df.drop_duplicates(subset="SN")

#sums the number of "Male" occurences in dupeless df then adds to series
gDemo["Male Players"] = (gDemoDupless_df.Gender == "Male").sum()

#Calculates percentage of players that are male
gDemo["Male Player Percentage"] = (gDemo["Male Players"] / playerCount) * 100

#sums the number of "Female" occurences in dupeless df then adds to series
gDemo["Female Players"] = (gDemoDupless_df.Gender == "Female").sum()

#Calculates percentage of players that are female
gDemo["Female Player Percentage"] = (gDemo["Female Players"] / playerCount) * 100

#sums the number of "Other/Non-Disclosed" occurences in dupeless df then adds to series
gDemo["Other/Non-Disclosed Players"] = (gDemoDupless_df.Gender == "Other / Non-Disclosed").sum()

#Calculates percentage of players that are Other/Non-Disclosed
gDemo["Other/Non-Disclosed Percentage"] = (gDemo["Other/Non-Disclosed Players"] / playerCount) * 100


#Purchasing Analysis(Gender)~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#create DF to store purchasing analysis by gender
gPA_df = pd.DataFrame({"Gender":["Female", "Male","Other/Non-Disclosed"],
                      "Purchase Count":[0,0,0],
                      "Average Purchase Price":[0,0,0],
                      "Total Purchase Value":[0,0,0],
                      "Avg Total Purchase per Person":[0,0,0]    
                     })

#set index to be gender
gPA_df.set_index("Gender", inplace=True)

#Sets values data frame values for males
gPA_df.loc["Male","Purchase Count"] = (itemPD_df.Gender == "Male").sum()
gPA_df.loc["Male","Average Purchase Price"] = itemPD_df.query('Gender == "Male"').Price.sum() / gPA_df.loc["Male","Purchase Count"]
gPA_df.loc["Male","Total Purchase Value"] = itemPD_df.query('Gender == "Male"').Price.sum()
gPA_df.loc["Male","Avg Total Purchase per Person"] = gPA_df.loc["Male","Total Purchase Value"] / gDemo["Male Players"]

#sets data frame values for females
gPA_df.loc["Female","Purchase Count"] = (itemPD_df.Gender == "Female").sum()
gPA_df.loc["Female","Average Purchase Price"] = itemPD_df.query('Gender == "Female"').Price.sum() / gPA_df.loc["Female","Purchase Count"]
gPA_df.loc["Female","Total Purchase Value"] = itemPD_df.query('Gender == "Female"').Price.sum()
gPA_df.loc["Female","Avg Total Purchase per Person"] = gPA_df.loc["Female","Total Purchase Value"] / gDemo["Female Players"]

#sets data frame values for other/non-disclosed
gPA_df.loc["Other/Non-Disclosed","Purchase Count"] = (itemPD_df.Gender == "Other / Non-Disclosed").sum()
gPA_df.loc["Other/Non-Disclosed","Average Purchase Price"] = itemPD_df.query('Gender == "Other / Non-Disclosed"').Price.sum() / gPA_df.loc["Other/Non-Disclosed","Purchase Count"]
gPA_df.loc["Other/Non-Disclosed","Total Purchase Value"] = itemPD_df.query('Gender == "Other / Non-Disclosed"').Price.sum()
gPA_df.loc["Other/Non-Disclosed","Avg Total Purchase per Person"] = gPA_df.loc["Other/Non-Disclosed","Total Purchase Value"] / gDemo["Other/Non-Disclosed Players"]

#Age Demographics~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#set bin for age ranges
bins = [0,9,14,19,24,29,34,39,999]

group_names = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

itemPD_df["Binned Age"] = pd.cut(itemPD_df["Age"], bins, labels=group_names)

ageGrouped_df = itemPD_df.groupby("Binned Age")

ageCountUnique_df = ageGrouped_df["SN"].nunique()

agePercentageTotal_df = round(ageCountUnique_df / playerCount * 100, 2)

ageDisplay_df = pd.DataFrame({"Total Count":ageCountUnique_df,
                              "Percentage of Players":agePercentageTotal_df
                             })

print(ageDisplay_df)


#Purchasing Analysis (Age)~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#Top Spenders~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
topPurchaseGroup = itemPD_df.groupby("SN")[["Price"]].sum()
top5spenders = topPurchaseGroup.sort_values(["Price"], ascending=False)
top5spenders.head()

#Most Popular Items~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#Most Profitable Items~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


            Total Count  Percentage of Players
Binned Age                                    
<10                  17                   2.95
10-14                22                   3.82
15-19               107                  18.58
20-24               258                  44.79
25-29                77                  13.37
30-34                52                   9.03
35-39                31                   5.38
40+                  12                   2.08


Unnamed: 0_level_0,Price
SN,Unnamed: 1_level_1
Lisosia93,18.96
Idastidru52,15.45
Chamjask73,13.83
Iral74,13.62
Iskadarya95,13.1
