### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [74]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [84]:
#players_t = purchase_data.SN.nunique()
#players_t
# Display the total number of players
player_demographics = purchase_data.loc[:, ["Gender", "SN", "Age"]]
player_demographics = player_demographics.drop_duplicates()
num_players = player_demographics.count()[0]
num_players

576

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [81]:
#Create table variables

#How many unique items are there?
u_items = len(purchase_data["Item ID"].unique())
u_items

#How many transactions were there?
n_trans = purchase_data["Purchase ID"].count()
n_trans

#What was the total value of the transactions
t_trans = purchase_data["Price"].sum()
t_trans = "${:,.2f}".format(t_trans)

#Determine the average price
ave_price = purchase_data["Price"].mean()
ave_price

#Create dataframe
Purchasing_Analysis_Total = pd.DataFrame({"# of Unique Items": [u_items],
                                         "Price (Ave)":[ave_price],
                                         "# of Transactions":[n_trans],
                                         "Total Sales":[t_trans]})

output = Purchasing_Analysis_Total.round(2)
output

Unnamed: 0,# of Unique Items,Price (Ave),# of Transactions,Total Sales
0,179,3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [95]:
# Who is playing this game? 
# Already counted totals in[9] "players_t"
gender_df = player_demographics["Gender"].value_counts()
gender_df

# Determine the percent for each gender
gender_per = (gender_df/num_players)*100
gender_perc = gender_per.round(1)
gender_perc

# New dataframe
gender_sum = pd.DataFrame({"gender_df":gender_df,"gender_perc":gender_perc})
gender_sum

#Add the percent of each gender into the gender df
#gender_df["Percent Players"] = gender_perc
#gender_df["Percent Players"] = gender_df["Percent Players"].map("{:,.1f}%".format)
#gender_df

# Rename the Gender column
#gender_df = gender_df.rename({"Gender":"Players by Gender"})#gender_df
#gender_df

Unnamed: 0,gender_df,gender_perc
Male,484,84.0
Female,81,14.1
Other / Non-Disclosed,11,1.9



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [100]:
# Create a df to capture data by gender
gender_analysis_df = purchase_data.groupby(["Gender"])

# Determine the number of purchases by gender
purch_count = gender_analysis_df["Gender"].count()
purch_count

# Total purchase by gender
t_purch = gender_analysis_df["Price"].sum()
t_purch

# Ave purchase price by gender
ave_purch = t_purch/purch_count

# Normalize data
purch_norm = gender_analysis_df["SN"].nunique()
purch_gender_norm = t_purch/purch_norm

# Print df
gender_org_df = pd.DataFrame({"Purchase Count": purch_count,
                            "Purchase Price (Ave)":ave_purch,
                            "Total Purchases by Gender":t_purch,
                            "Normalized Total Purchase":purch_gender_norm})

gender_org_df["Purchase Count"] = gender_org_df["Purchase Count"].map("{:,.2f}%".format)

gender_org_df


Unnamed: 0_level_0,Purchase Count,Purchase Price (Ave),Total Purchases by Gender,Normalized Total Purchase
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113.00%,3.203009,361.94,4.468395
Male,652.00%,3.017853,1967.64,4.065372
Other / Non-Disclosed,15.00%,3.346,50.19,4.562727


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [107]:
# Establish bins for ages
#age_bins = [0, 11.90, 18.90, 22.90, 26.90, 30.90, 39.90, 99999]
#group_names = ["<12", "12-18", "19-22", "22-26", "27-30", "31-40", "40+"]
age_bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
print(player_demographics)
#database for age Demo
player_demographics["Player Age Demographics"]= pd.cut(player_demographics["Age"], age_bins, labels=group_names)
#demographic_group =player_demographics.groupby("Player Age Demographics")

# Determine the total count in each age group
age_df = player_demographics["Player Age Demographics"].value_counts()
age_df

# Determine the percent of each age group
age_perc = (age_df/num_players)*100
age_perc

#Add the percent of each age group into the age df
#age_df["Percent of Total Players"] = age_perc
#age_df["Percent of Total Players"] = age_df["Percent of Total Players"].map("{:,.2f}%".format)
#age_df

# Rename the age group column
#age_df = age_df.rename(columns={"Player Age Demographics":"Total Players by each age group"})
#age_df

     Gender             SN  Age Player Age Demographics
0      Male        Lisim78   20                   20-24
1      Male    Lisovynya38   40                     40+
2      Male     Ithergue48   24                   20-24
3      Male  Chamassasya86   24                   20-24
4      Male      Iskosia90   23                   20-24
..      ...            ...  ...                     ...
773    Male         Hala31   21                   20-24
774    Male     Jiskjask80   11                   10-14
775  Female     Aethedru70   21                   20-24
777    Male     Yathecal72   20                   20-24
778    Male        Sisur91    7                     <10

[576 rows x 4 columns]


20-24    44.791667
15-19    18.576389
25-29    13.368056
30-34     9.027778
35-39     5.381944
10-14     3.819444
<10       2.951389
40+       2.083333
Name: Player Age Demographics, dtype: float64

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [118]:
#create new df
purchase_data["Purchase Analysis by Age"]= pd.cut(purchase_data["Age"], age_bins, labels=group_names)

# total purchases by age group
age_group_df = purchase_data.groupby(["Purchase Analysis by Age"]).sum()["Price"]
age_group_df

# total price paid per group
age_group_tot_price = purchase_data.groupby(["Purchase Analysis by Age"]).mean()["Price"]
age_group_tot_price



# Ave price paid per item
#age_group_ave_price = age_group_tot_price/age_group_df
#age_group_ave_price = demographic_group["Price"].mean()
#age_group_ave_price = age_group_ave_price.round(2)
#age_group_ave_price

# Ave spend per player
#age_group_players = demographic_group["SN"].nunique()
#age_group_players
#age_group_player_spend = age_group_tot_price/age_group_players
#age_group_player_spend

#Print Age df
#age_group_df["Purchase Count"] = age_group_df
#age_group_df["Total Purchases"] = age_group_tot_price
#age_group_df["Purchase Price per Item (Ave)"] = age_group_ave_price
#age_group_df["Average Spend per Player"] = age_group_player_spend
#age_group_df

Purchase Analysis by Age
<10      3.353478
10-14    2.956429
15-19    3.035956
20-24    3.052219
25-29    2.900990
30-34    2.931507
35-39    3.601707
40+      2.941538
Name: Price, dtype: float64

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [122]:
# Create a df to determine the top spender
top_spender_df = purchase_data.groupby("SN").sum()["Price"].rename("Total Spend")
top_spender_df

# How much did each player spend?
#t_player_spend = top_spender_df["Price"].sum()
#t_player_spend = t_player_spend.map("${:,.2f}".format)
#t_player_spend

# Average purchase price per user
#ave_pur_player = top_spender_df["Price"].mean()
#ave_pur_player_f = ave_pur_player.map("${:,.2f}".format)
#ave_pur_player_f

#Print top spenders df
#top_spender_df["Total Spend"] = t_player_spend
#top_spender_df["Ave Spend per Item"] = ave_pur_player_f
#age_group_df["Purchase Price per Item (Ave)"] = age_group_ave_price
#age_group_df["Average Spend per Player"] = age_group_player_spend
#top_spender_df.head()

#Create new data frome


# Need to orgainze and have total purchasing be descending
#top_spender_df_d = top_spender_df_d.sort_values("Total Spend",ascending=False)
#top_spender_df_d

SN
Adairialis76     2.28
Adastirin33      4.48
Aeda94           4.91
Aela59           4.32
Aelaria33        1.79
                 ... 
Yathecal82       6.22
Yathedeu43       6.02
Yoishirrala98    4.58
Zhisrisu83       7.89
Zontibe81        8.03
Name: Total Spend, Length: 576, dtype: float64

## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [73]:
# what items sold most frequently
most_freq_df = purchase_data.groupby(['Item ID', 'Item Name'])
most_freq_df.count()

# Average item price is equal to item price
price_item = most_freq_df["Price"].mean()
price_item

# df is now organized by item- total price
t_price_item = most_freq_df["Price"].sum()
t_price_item = t_price_item.map("${:,.2f}".format)
t_price_item

# create df to output
most_freq_df["Item Price"] = price_item
most_freq_df["Total Spent for Item"] = t_price_item
most_freq_df



TypeError: 'DataFrameGroupBy' object does not support item assignment

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

