### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data_df = pd.read_csv(file_to_load)
purchase_data_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,101,Final Critic,4.19


## Player Count

* Display the total number of players


In [2]:
#Find unique players (use SN [Screen Name])

print("Total Number of Players")
totalPlayers=purchase_data_df.SN.nunique(dropna=False)
print(totalPlayers)

Total Number of Players
576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
# Count unique values in column 'Item ID' of the dataframe

uniqueValues = purchase_data_df['Item ID'].nunique()
 
print('Number of Unique Items')
print(uniqueValues)

#Find average price (mean of Price column)

avg_price = purchase_data_df["Price"].mean()
print('Average Price')
print(avg_price)

#Find number of purchases (count of rows)

print('Total Purchases')
totalPurchases = purchase_data_df["Purchase ID"].count()
print(totalPurchases)

#Find total revenue (sum for Price column)

print('Total Revenue')
totalRevenue = purchase_data_df["Price"].sum()
print(totalRevenue)

Number of Unique Items
183
Average Price
3.050987179487176
Total Purchases
780
Total Revenue
2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [4]:
#Get counts for Male, Female, Other
#Keep in mind that players may have made multiple purchases- make sure only to count unique individuals
#Percentage~ Male/Female/Other divided by total then multiply by 100

print('Males')
males_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Male", :]
uniqueMales = males_df['SN'].nunique()
print(uniqueMales)

pctMales=(uniqueMales/totalPlayers)*100
print(pctMales)

print('Females')
females_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Female", :]
uniqueFems = females_df['SN'].nunique()
print(uniqueFems)

pctFems=(uniqueFems/totalPlayers)*100
print(pctFems)

print('Others/Non-Disclosed')
other_nondisclosed_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Other / Non-Disclosed", :]
uniqueOthers = other_nondisclosed_df['SN'].nunique()
print(uniqueOthers)

pctOthers=(uniqueOthers/totalPlayers)*100
print(pctOthers)

Males
484
84.02777777777779
Females
81
14.0625
Others/Non-Disclosed
11
1.9097222222222223



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [16]:
#Retrieve purchase count: group genders by Purchase ID

grouped_males_df = males_df.groupby("Purchase ID")
male_purchases = grouped_males_df["Purchase ID"].sum()
print('Purchase Count: Males')
print(len(male_purchases))

grouped_females_df = females_df.groupby("Purchase ID")
female_purchases = grouped_females_df["Purchase ID"].sum()
print('Purchase Count: Females')
print(len(female_purchases))

grouped_other_df = other_nondisclosed_df.groupby("Purchase ID")
other_purchases = grouped_other_df["Purchase ID"].sum()
print('Purchase Count: Other / Non-Disclosed')
print(len(other_purchases))


#NEED HELP WITH THIS SECTION

#Retrieve avg. purchase price: group genders by Price
grouped_males_avg_df = males_df.groupby("Price")
male_avg_purchase = grouped_males_avg_df["Price"].mean()
print('Average Purchase Price: Males')
print(male_avg_purchase)

grouped_females_avg_df = females_df.groupby("Price")
female_avg_purchase = grouped_females_avg_df["Price"].mean()
print('Average Purchase Price: Females')
print(female_avg_purchase)

grouped_other_avg_df = other_nondisclosed_df.groupby("Price")
other_avg_purchases = grouped_other_df["Price"].mean()
print('Average Purchase Price: Other / Non-Disclosed')
print(other_avg_purchases)



Purchase Count: Males
652
Purchase Count: Females
113
Purchase Count: Other / Non-Disclosed
15
Average Purchase Price: Males
Price
1.00    1.00
1.01    1.01
1.02    1.02
1.03    1.03
1.06    1.06
        ... 
4.90    4.90
4.91    4.91
4.93    4.93
4.94    4.94
4.99    4.99
Name: Price, Length: 144, dtype: float64
Average Purchase Price: Females
Price
1.00    1.00
1.01    1.01
1.02    1.02
1.09    1.09
1.16    1.16
        ... 
4.75    4.75
4.76    4.76
4.84    4.84
4.88    4.88
4.90    4.90
Name: Price, Length: 79, dtype: float64
Average Purchase Price: Other / Non-Disclosed
Purchase ID
9      3.58
22     3.81
82     4.40
111    4.75
228    3.39
237    3.55
242    3.94
291    3.45
350    2.22
401    1.33
484    3.94
549    3.10
629    2.18
637    3.45
747    3.10
Name: Price, dtype: float64


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

