### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [2]:
number_of_players = purchase_data["SN"].nunique()
number_of_players

576

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
# Number of unique items
number_of_items = purchase_data["Item ID"].nunique()

# Average price of items 
average_price = purchase_data["Price"].mean()

# Number of Purchases
number_of_purchases = purchase_data["Purchase ID"].count()

# Revenue
revenue = average_price * number_of_purchases

# Create a new DataFrame to display the summary 
summary_df = pd.DataFrame({'Number of Unique Items':number_of_items, 
                           'Average Price':average_price.round(2), 
                           'Number of Purchases':number_of_purchases, 
                           'Total Revenue':revenue.round(2)}, index=[0])
summary_df.head()

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [4]:
# Data frame to work on gender demographics
unique_df = purchase_data.drop_duplicates(subset='SN')
unique_df.head()

# New Data Frame for Gender Demographics
columns = ['Total Count', 'Percentage Players']
gender_group = unique_df.groupby("Gender")

# Group the genders get the total Count 
gender_df = pd.DataFrame(gender_group['Gender'].value_counts())
gender_df = gender_df.rename(columns={"Gender":"Total Count"})

# Get the Percentage of the genders
gender_df['Percentage of Players'] = (gender_group['Gender'].value_counts()/number_of_players)*100
gender_df['Percentage of Players'] = gender_df['Percentage of Players'].map("{:,.2f}%".format)
#gender_df = gender_df.loc[:,~gender_df.columns.duplicated()]
#gender_df.sort_values(by=['Total Count'],ascending=False)
gender_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Count,Percentage of Players
Gender,Gender,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,Female,81,14.06%
Male,Male,484,84.03%
Other / Non-Disclosed,Other / Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [5]:
# Obtain Purchase count, average purchase price, avg. purchase total per person
#purchase_count = gender_df
#purchase_count = purchase_data.loc[purchase_data['Gender']=='Male']].count()

# Purchase count, avg. purchase price,
grp = purchase_data.groupby('Gender').agg({'Price':['count','mean','sum','size']})
'''.rename(columns={'count':'Purchase Count','mean': 'Average Purchase Price','sum': 'Total Purchase Value'})'''
print(grp)

print()

'''print(purchase_data.groupby('Gender').agg(Purchase_Count=('Price','count'),
                                         Average_Purchase_Price=('Price','mean'),
                                         Total_Purchase_Value=('Price','sum')))
'''
# Average purchase total per person
#perp = purchase_data.groupby('SN').agg({'Price':['sum']})
#perp = purchase_data.groupby('Gender').Price.sum()
#print(perp.apply(lambda x: x.describe()))
#perp = purchase_data.loc(purchase_data.groupby(['Gender','SN'])['Price'].idxmin())
perp = purchase_data.groupby(['Gender'])[['Price']].sum()/2
#print(perp.head())
perp.head()
#ts = purchase_data.loc[purchase_data['Gender']]
#print(purchase_data.groupby('Gender')perp.agg({'Price':['sum']}))

                      Price                        
                      count      mean      sum size
Gender                                             
Female                  113  3.203009   361.94  113
Male                    652  3.017853  1967.64  652
Other / Non-Disclosed    15  3.346000    50.19   15



Unnamed: 0_level_0,Price
Gender,Unnamed: 1_level_1
Female,180.97
Male,983.82
Other / Non-Disclosed,25.095


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [20]:
# DataFrame to work for this part
age_demo_df = pd.DataFrame(unique_df)
age_demo_df
# Create bins for ages
bins = [0,9,14,19,24,29,34,39,150]

# Create labels for these bins
group_labels = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']

# Slice the data and place it into bins
age_demo_df['Age View'] = pd.cut(purchase_data['Age'], bins, labels = group_labels)
age_demo_df.head()


age_demo_group = pd.DataFrame(age_demo_df.groupby('Age View'))
age_demo_group['Age'].value_counts()
age_demo_group['Percentage'] = age_demo_group['Age'].value_counts()/number_of_players

# New Data Frame for Age Demographics
columns = ['Total Count', 'Percentage Players']
age_group = unique_df.groupby("Age")

# Group the genders get the total Count 
age_df = pd.DataFrame(age_group['Age'].value_counts())
age_df = age_df.rename(columns={"Gender":"Total Count"})

# Get the Percentage of the genders
#gender_df['Percentage of Players'] = (gender_group['Gender'].value_counts()/number_of_players)*100
#gender_df['Percentage of Players'] = gender_df['Percentage of Players'].map("{:,.2f}%".format)
#age_demo_group[['Age']].apply(lambda x: x/x.sum()*100)


KeyError: 'Age'

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

