### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [2]:
# convert to database and look at data
purchase_data_df = pd.read_csv(file_to_load)
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [None]:
# review the data types in columns
purchase_data_df.dtypes

In [3]:
# Rename the SN column to Total Players and identify the total number of unique players
playerdata_df = purchase_data_df.rename(columns={"SN": "Total_Players"})
Total_Players = playerdata_df.groupby("Total_Players")["Total_Players"].nunique()
Total_Players.count()
tp_count = Total_Players.count()

In [4]:
#Create a new data frame referencing the total number of unique players
total_players_df = pd.DataFrame({
    "Total Players":[Total_Players.count()]})
total_players_df

Unnamed: 0,Total Players
0,576


In [5]:
#Identify the number of unique Item IDs in the data set
unique_items = purchase_data_df.groupby("Item ID")["Item ID"].nunique()
unique = unique_items.count()

#Calculate the total number of purchases
purchases = purchase_data_df["Purchase ID"].count()

#Calculate the total revenue
total_revenue = purchase_data_df["Price"].sum()


# Calculate the average price of the items from the total revenue and number of purchase
average_price = ((total_revenue)/(purchases))


In [6]:
#Create a new dictionary with purchasing analysis
purchasing_analysis_df = pd.DataFrame({
    "Number of Unique Items":[unique],
    "Average Price":[average_price], 
    "Number of Purchases":[purchases], 
    "Total Revenue":[total_revenue]})
purchasing_analysis_df


Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.050987,780,2379.77


In [7]:
#Clean the formatting of the database
#file_df["avg_cost"] = file_df["avg_cost"].map("${:.2f}".format)
purchasing_analysis_df["Average Price"] = purchasing_analysis_df["Average Price"].map("${:.2f}".format)
purchasing_analysis_df["Total Revenue"] = purchasing_analysis_df["Total Revenue"].map("${:,.2f}".format)
purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [8]:
#Clean data so that only unique players are present 
x = playerdata_df.duplicated(['Total_Players', 'Gender'])
playerdata_df.loc[x, ['Total_Players', 'Gender']] = ''
playerdata_df.tail()

Unnamed: 0,Purchase ID,Total_Players,Age,Gender,Item ID,Item Name,Price
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,,21,,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19
779,779,,24,,50,Dawn,4.6


In [9]:
#Filter data by gender
Male = playerdata_df.loc[playerdata_df["Gender"] == "Male"]
Male_Total = Male.count()

Female = playerdata_df.loc[playerdata_df["Gender"] == "Female"]
Female_Total = Female.count()

Other = playerdata_df.loc[playerdata_df["Gender"] == "Other / Non-Disclosed"]
Other_Total = Other.count()

Gender = Male_Total + Female_Total + Other_Total
Gender

Purchase ID      576
Total_Players    576
Age              576
Gender           576
Item ID          576
Item Name        576
Price            576
dtype: int64

In [10]:
male_percent = ((Male_Total)/(Gender)*100).map("{:,.2f}%".format)
female_percent = ((Female_Total)/(Gender)*100).map("{:,.2f}%".format)
other_percent = ((Other_Total)/(Gender)*100).map("{:,.2f}%".format)

In [11]:
gender_analysis_df = pd.DataFrame({
    "" : ["Male", "Female", "Other / Non-Disclosed"],
    "Total Count":[Male_Total, Female_Total, Other_Total],
    "Percentage of Players":[male_percent, female_percent, other_percent]})

gender_analysis_df

Unnamed: 0,Unnamed: 1,Total Count,Percentage of Players
0,Male,Purchase ID 484 Total_Players 484 Age ...,Purchase ID 84.03% Total_Players 84.03...
1,Female,Purchase ID 81 Total_Players 81 Age ...,Purchase ID 14.06% Total_Players 14.06...
2,Other / Non-Disclosed,Purchase ID 11 Total_Players 11 Age ...,Purchase ID 1.91% Total_Players 1.91% ...



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [12]:
#Find the Purchase Count
purchase_df = purchase_data_df.groupby(["Gender"])
purchase_count = purchase_df["Purchase ID"].count()
purchase_count

#state_duration = grouped_usa_df["duration (seconds)"].sum()

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64

In [13]:
purchase_df.head(5)


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58
15,15,Lisassa64,21,Female,98,"Deadline, Voice Of Subtlety",2.89
18,18,Reunasu60,22,Female,82,Nirvana,4.9
22,22,Siarithria38,38,Other / Non-Disclosed,24,Warped Fetish,3.81
38,38,Reulae52,10,Female,116,Renewed Skeletal Katana,4.18


In [14]:
#Find the Average Purchase Price
purchase_price = purchase_df["Price"].sum()
avg_purchase_price = ((purchase_price)/(purchase_count)).map("${:,.2f}".format)
avg_purchase_price

Gender
Female                   $3.20
Male                     $3.02
Other / Non-Disclosed    $3.35
dtype: object

In [15]:
#Find the Total Purchase Value
purchase_value = purchase_df["Price"].sum()
total_purchase_value = purchase_value.map("${:,.2f}".format)
total_purchase_value


Gender
Female                     $361.94
Male                     $1,967.64
Other / Non-Disclosed       $50.19
Name: Price, dtype: object

In [16]:
#Find purchase per person
#Avg_PPP = (purchase_value)/(tp_count)
#Avg_PPP
#Avg Total Purchase per Person

In [17]:
#construct Database
Purchase_Analysis_Gender = pd.DataFrame({
    "Purchase Count" : [purchase_count],
    "Average Purchase Price":[avg_purchase_price],
    "Total Purchase Value":[total_purchase_value],
    "Avg Total Purchase per Person": ["n/a"]})

Purchase_Analysis_Gender


Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
0,Gender Female 113 Male ...,Gender Female $3.20 Male ...,Gender Female $361.94 Male...,


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [18]:
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [24]:
#Establish bins for age demographics
bins = [0, 9, 14, 19, 24, 29, 34, 39, 40]
labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

purchase_data_df["Player Group"] = pd.cut(purchase_data_df["Age"], bins, labels=labels, include_lowest=True)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Player Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24
...,...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54,20-24
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63,20-24
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46,20-24
778,778,Sisur91,7,Male,92,Final Critic,4.19,<10


In [None]:
#Calculate the numbers and percentages by age group

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

