### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [2]:

players = purchase_data["SN"].drop_duplicates()
players1 = players.count()
print("Total Players")
print("---------------")
players1

Total Players
---------------


576

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
items = len(purchase_data["Item ID"].unique())
items

purchases = purchase_data["Purchase ID"].count()
purchases

net_income = purchase_data["Price"].sum()
net_income

average_price = purchase_data["Price"].mean()
average_price

price = net_income/purchases
price

purchasing_df = pd.DataFrame([{"# of Items": items, "Average Price": average_price, "# of Purchases": purchases, "Net Income": net_income}])
purchasing_df = purchasing_df[["# of Items", "Average Price", "# of Purchases", "Net Income"]]
purchasing_df = purchasing_df.round(2)

purchasing_df


Unnamed: 0,# of Items,Average Price,# of Purchases,Net Income
0,179,3.05,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [4]:
demo = purchase_data.loc[:, ["Gender", "SN", "Age"]]
demo = demo.drop_duplicates()


genders = demo["Gender"].value_counts()
gender_percent = genders/players1*100
genders
demographics = pd.DataFrame({"Percentage": gender_percent, "Totals": genders})

demographics = demographics.round(2)
demographics = demographics.dropna()
demographics

Unnamed: 0,Percentage,Totals
Male,84.03,484
Female,14.06,81
Other / Non-Disclosed,1.91,11



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [5]:
purchase_total = purchase_data.groupby(["Gender"]).count()["Price"]
avg_purch_price = purchase_data.groupby(["Gender"]).mean()["Price"]
total_purchase_value = purchase_data.groupby(["Gender"]).sum()["Price"]
totals = total_purchase_value/genders

summary_purchasing = pd.DataFrame({"Purchase Total":purchase_total, "Avg Price/Purchase": avg_purch_price, "Total Purchase Amount": total_purchase_value, "Avg Total Purchase per Person": totals})

summary_purchasing = summary_purchasing[["Purchase Total", "Avg Price/Purchase", "Total Purchase Amount", "Avg Total Purchase per Person"]]
summary_purchasing = summary_purchasing.round(2)
summary_purchasing

Unnamed: 0_level_0,Purchase Total,Avg Price/Purchase,Total Purchase Amount,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.2,361.94,4.47
Male,652,3.02,1967.64,4.07
Other / Non-Disclosed,15,3.35,50.19,4.56


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [6]:
bins= [0,10,15,20,25,30,35,40,999]
group_names=['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']

unique_heroes=demo.loc[:,["SN","Age"]]
unique_heroes["Age Ranges"] = pd.cut(unique_heroes["Age"], bins, labels = group_names)
unique_heroes

age_demographics_totals = unique_heroes["Age Ranges"].value_counts()
age_demographics_percents = (age_demographics_totals / players * 100)
age_demographics_percents.round(2)
age_demographics = pd.DataFrame({"Percentage of Total": age_demographics_percents, "Age Group Total": age_demographics_totals})

age_demographics.sort_index()

TypeError: loop of ufunc does not support argument 0 of type float which has no callable rint method

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:

user_count=purchase_data.groupby(["SN"]).count()["Price"]
user_average=purchase_data.groupby(["SN"]).mean()["Price"].map("${0:,.2f}".format)
user_total=purchase_data.groupby(["SN"]).sum()["Price"].map("${0:,.2f}".format)

user_summary=pd.DataFrame({"Purchase Count":user_count,
                          "Average Purchase":user_average,
                          "Total Purchase Value":user_total})
user_summary.sort_values("Total Purchase Value",ascending=False).round(2).head()

## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [14]:
id = purchase_data.groupby(purchase_data['Item ID'])
#item_id.head()
items = id['Item ID'].unique().str[0]
name = id['Item Name'].unique().str[0]
purchase_count = id['Age'].count()
price = id['Price'].unique().str[0]
purchase_total = id['Price'].sum()

item_summary =pd.DataFrame({'Item ID':items,'Item Name':name,'Item Price':price,'Item Count':purchase_count,'Total Purchase':purchase_total})


#item_summary = item_summary.set_index('Item ID')
item_summary = item_summary.sort_values('Item Count', ascending=False)
item_summary_df = item_summary[['Item Name','Item Count','Item Price','Total Purchase']]
item_summary_df.head()

Unnamed: 0_level_0,Item Name,Item Count,Item Price,Total Purchase
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.88,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
145,Fiery Glass Crusader,9,4.58,41.22
132,Persuasion,9,3.19,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,3.53,31.77


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [17]:
most_profit=item_summary.sort_values('Total Purchase', ascending=False)
most_profit.head()

Unnamed: 0_level_0,Item ID,Item Name,Item Price,Item Count,Total Purchase
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
92,92,Final Critic,4.88,13,59.99
178,178,"Oathbreaker, Last Hope of the Breaking Storm",4.23,12,50.76
82,82,Nirvana,4.9,9,44.1
145,145,Fiery Glass Crusader,4.58,9,41.22
103,103,Singed Scalpel,4.35,8,34.8
