### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
csv_path = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data_df = pd.read_csv(csv_path)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,101,Final Critic,4.19


* Display the total number of players


In [2]:
player_count = purchase_data_df["SN"].nunique()
print("Player Count =", player_count)



Player Count = 576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
unique_item_count =  purchase_data_df["Item Name"].nunique()
unique_purchase_count = purchase_data_df ["Purchase ID"].nunique()
unique_sale_item = purchase_data_df["Item ID"].nunique()
average_price = purchase_data_df["Price"].mean()
print("Unique Item Count =", unique_item_count)
print("Unique Purchase Count=", unique_purchase_count)
print("Unique Sale Item =", unique_sale_item)
print("Average Price =", average_price)



Unique Item Count = 179
Unique Purchase Count= 780
Unique Sale Item = 183
Average Price = 3.050987179487176


In [4]:
# create a new summary table for the above calculations
summary_table_df = pd.DataFrame({"Item Count": [unique_item_count]
                          ,"Purchase Count": [unique_purchase_count]
                          ," Item Count": [unique_sale_item]
                          ,"Average Price": [average_price]})
summary_table_df



Unnamed: 0,Item Count,Purchase Count,Item Count.1,Average Price
0,179,780,183,3.050987


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [5]:
Percent_gender_count = (purchase_data_df["Gender"].value_counts())/100
print(Percent_gender_count)



Male                     6.52
Female                   1.13
Other / Non-Disclosed    0.15
Name: Gender, dtype: float64



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [6]:
purchase_analysis_df = purchase_data_df.groupby(['Gender'])
print(purchase_analysis_df)

purchase_analysis_df.count()



<pandas.core.groupby.generic.DataFrameGroupBy object at 0x1224b2b50>


Unnamed: 0_level_0,Purchase ID,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,113,113,113,113,113,113
Male,652,652,652,652,652,652
Other / Non-Disclosed,15,15,15,15,15,15


In [7]:
purchase_count = purchase_analysis_df["Purchase ID"].count()
avg_purchase_price = purchase_analysis_df["Price"].mean()
#avg_purchase_per_person = purchase_data_df["Price"].sum()/ purchase_data_df["SN"]
print(purchase_count)
print(avg_purchase_price)
#print(avg_purchase_per_person)

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64
Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [8]:
#know the range of age in the data set
print("Min Age =", purchase_data_df["Age"].min())
print("Max Age=", purchase_data_df["Age"].max())


Min Age = 7
Max Age= 45


In [9]:
#Calculate the numbers and percentages by age group
# Create a goroupby object
group_percent =purchase_data_df["Age"].groupby(purchase_data_df["SN"])
group_percent

print(group_percent)

<pandas.core.groupby.generic.SeriesGroupBy object at 0x12256a210>


In [10]:
# Create the bins in which Data will be held
bins = [0, 9, 14, 19, 24, 29, 34, 39, 48]
# Create the group names for the bins
age_group =  ["0 to 9", "10 to 14", "15 to 19", "20 to 24", "25 to 29", "30 to 34", "35 to 39", "40 to 48"]


purchase_data_df["Age"] = pd.cut(purchase_data_df["Age"], bins, labels=age_group)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20 to 24,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40 to 48,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,20 to 24,Male,92,Final Critic,4.88
3,3,Chamassasya86,20 to 24,Male,100,Blindscythe,3.27
4,4,Iskosia90,20 to 24,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,20 to 24,Female,60,Wolf,3.54
776,776,Iral74,20 to 24,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20 to 24,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,0 to 9,Male,101,Final Critic,4.19


In [19]:
#count the values by age groups
total_age_count = purchase_data_df["Age"].value_counts()

# percentage
age_percentage = (total_age_count/player_count)*100
print("age_percentage:", age_percentage)


age_percentage: 20 to 24    63.368056
15 to 19    23.611111
25 to 29    17.534722
30 to 34    12.673611
35 to 39     7.118056
10 to 14     4.861111
0 to 9       3.993056
40 to 48     2.256944
Name: Age, dtype: float64


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
#purchase_age_analysis


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

