### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import pandas as pd
import numpy as np

In [None]:
# File to Load 
file_to_load = "Resources/purchase_data.csv"

In [None]:
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [None]:
purchase_data.head(10)

In [None]:
purchase_data.tail(10)

In [None]:
purchase_data.describe()

In [None]:
purchase_data.info()

## Player Count

In [None]:
player_count = purchase_data.SN.nunique()

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [None]:
unique_items = purchase_data["Item ID"].nunique()
average_price = purchase_data.Price.mean
number_of_purchases = len(purchase_data)

revenue = purchase_data.Price.sum()

summary_row =[unique_items, average_price, number_of_purchases, revenue]

pd.DataFrame(summary_row).T

## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [None]:
player_data = purchase_data.loc[:, ["SN", "Age", "Gender"]].drop_duplicates().reset_index(drop=True)
player_data.head()

In [None]:
player_data.groupby("Gender").size()

In [None]:
player_summary = pd.DataFrame(player_data.groupby("Gender").size(), columns=["Count"])
player_summary["Percentage"] = player_summary["Count"] / len(player_data) * 100
player_summary


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
gender_purchase_count = purchase_data.groupby("Gender").size()
gender_price_average = purchase_data.groupby("Gender").Price.mean()
gender_price_sum = purchase_data.groupby("Gender").Price.sum()

pd.DataFrame([gender_purchase_count, gender_price_average, gender_price_sum]).T

In [None]:
gender_purchase_summary = pd.DataFrame([gender_purchase_count, gender_price_average, gender_price_sum]).T
gender_purchase_summary.columns = ["Total Purchases", "Average Purchases", "Sum Purchases"]
gender_purchase_summary['Per Player Average Purchase'] = gender_purchase_sum / player_count
gender_purchase_summary

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [None]:
bins = [0, 9, 14, 19, 24, 29, 34, 39, 9000]

group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]


In [None]:
player_data["AgeGroup"] = pd.cut(player_data["Age"], bins, labels=group_names, include_owest=True)
playa_data.head

In [None]:
player_summary2 = pd.DataFrame(player_data.groupby("AgeGroup").size(), columns=["Count"])
player_summary2["Percentage"] = player_summary2["Count"] / len(player_data) * 100
player_summary2

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [None]:
purchase_data["AgeGroup"] = pd.cut(purchase_data["Age"], bins, labels=group_names, include_owest=True)
purchase_data.head

In [None]:
age_purchase_count = purchase_data.groupby("AgeGroup").size()
age_price_average = purchase_data.groupby("AgeGroup").Price.mean()
age_price_sum = purchase_data.groupby("AgeGroup").Price.sum()

In [None]:
age_purchase_summary = pd.DataFrame([age_purchase_count, age_price_average, age_price_sum]).T
age_purchase_summary.columns = ["Total Purchases", "Average Purchases", "Sum Purchases"]
age_purchase_summary['Per Player Average Purchase'] = age_price_sum / player_summary2
age_purchase_summary

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
sn_purchase_count = purchase_data.groupby("SN").size()
sn_price_average = purchase_data.groupby("SN").Price.mean()
sn_price_sum = purchase_data.groupby("SN").Price.sum()

In [None]:
sn_purchase_summary = pd.DataFrame([sn_purchase_count, sn_price_average, sn_price_sum]).T
sn_purchase_summary.columns = ["Total Purchases", "Average Purchases", "Sum Purchases"]
sn_purchase_summary.sort_values(by="Sum Purchases", ascending=False).head()

## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [None]:
item_data = purchase_data.loc[:, ["Item ID", "Item Name", "Price"]]
item_data.head()

In [None]:
item_purchase_count = purchase_data.groupby(["Item ID", "Item Name"]).size()
item_price_average = purchase_data.groupby(["Item ID", "Item Name"]).Price.mean()
item_price_sum = purchase_data.groupby(["Item ID", "Item Name"]).Price.sum()

In [None]:
item_purchase_summary = pd.DataFrame([item_purchase_count, item_price_average, item_price_sum]).T
item_purchase_summary.columns = ["Total Purchases", "Average Purchases", "Sum Purchases"]
item_purchase_summary.sort_values(by="Total Purchases", ascending=False).head()

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [None]:
item_purchase_summary.sort_values(by="Sum Purchases", ascending=False).head()