### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import csv
# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()


## Player Count

* Display the total number of players


In [2]:
purchase_data['SN'].nunique()
total_players = purchase_data['SN'].nunique()
print(total_players)

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
unique_items = purchase_data['Item ID'].nunique()
uniques = purchase_data['Item ID'].unique()

unique_df = purchase_data.loc[uniques]
avg_price = unique_df["Price"].mean()
total_revenue = purchase_data["Price"].sum()
total_purchases = purchase_data["Purchase ID"].count()
#avg_df = pd.DataFrame[avg_price]
#total_purchases
#avg_price
#total_revenue
#unique_df
summary_df = pd.DataFrame([[unique_items, avg_price, total_revenue, total_purchases]], index=['Value'],
                         columns=['Uniques', 'Average Price $', 'Total Revenue $', 'Total Purchases'])
#summary_df.columns = ["Uniques", "Average Price", "Total Revenue", "Total Purchases"]
summary_df
#unique_df

## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [4]:
#dataframe to hold count results
gender_df = pd.DataFrame(purchase_data["Gender"].value_counts())
gender_df

percentage_of_players = (purchase_data["Gender"].value_counts()/(purchase_data["SN"].nunique()))*100
percentage_of_players

# Calculations performed and added into Data Frame as a new column
gender_df["Percentage of Players"] = percentage_of_players
gender_df["Percentage of Players"] = gender_df["Percentage of Players"].map("{:,.2f}%".format)
gender_df

# Change the order of the columns 
order_gender_df = gender_df[["Percentage of Players", "Gender"]]
order_gender_df

# Rename the column "Gender" to "Total Counts" using .rename(columns={})
fin_gender_df = order_gender_df.rename(columns={"Gender":"Total Count"})
fin_gender_df


## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [5]:
gender_groups = purchase_data.groupby(["Gender"])
print(gender_groups)
#gender_groups["Purchase ID"].count().head()

#purchases by gender 
total_value = gender_groups["Price"].sum()
total_count = gender_groups["Price"].count()
#total_value.head()
total_cleaned = total_value.map("${:,.2f}".format)
#total_cleaned.head()

#average price by gender
average_gender = gender_groups["Price"].mean()
#average_gender.head()
average_gender_clean = average_gender.map("${:,.2f}".format)
average_gender_clean.head()

#normalize data by dividing total purchases by gender count
normalized_gender = total_value/total_count
normalized_gender

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [6]:
age_bins = [0, 10, 15, 20, 25, 30 , 35, 40, 100]

group_names = ["<10", "10-15", "16-20", "21-25", "26-30", "31-35", "36-40", "40+"]

age_groups_df = purchase_data
age_groups_df["Age Bins"] = pd.cut(age_groups_df["Age"], age_bins, labels=group_names)
#age_groups_df

age_groups_df = age_groups_df.groupby("Age Bins")
#age_groups_df.count()

age_summary = pd.DataFrame(age_groups_df.count())
#age_summary

age_summary["Purchase ID"] = (age_summary["Purchase ID"]/total_players)*100
#age_summary
#age_summary = age_summary.set_index['Purchase ID']
age_summary = age_summary["Purchase ID"].map("{:,.2f}%".format)
age_summary
#clean_age_df = age_groups_df[["Purchase ID","SN"]]
#clean_age_df.head()
#age_summary_df = clean_age_df.columns(columns={"Purchase ID":"Percentage of Players", "SN":"Total Count"})
#age_summary_df.head()

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [7]:
purchase_count = age_groups_df["Age"].count()
#purchases
average_purchase = age_groups_df["Price"].mean()
#average_purchases
total_value = age_groups_df["Price"].sum()
#total_value
unique_players = purchase_data["SN"].nunique()
#unique_players
average_per_person = age_groups_df["Price"].sum()/unique_players
age_summary_df = pd.DataFrame({"Purchase count":purchase_count,"Average Purchase Price $":average_purchase,
                           "Total Purchase value $":total_value,"Average Purchase Total per person $":average_per_person})
age_summary_df

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [8]:
top_spenders = purchase_data.groupby("SN")
purchase_count = top_spenders["SN"].count()
#top_spenders.head()
average_purchase = top_spenders["Price"].mean()
total_bought = top_spenders["Price"].sum()
whale_df = pd.DataFrame({"Purchase count":purchase_count,"Average Purchase Price":average_purchase,"Total Purchase value $":total_bought})
whale_df.sort_values(by=['SN'], inplace=True, ascending=False)
whale_df.head()

## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [9]:
popular_df = purchase_data[["Item ID", "Item Name", "Price"]]
popular_df.groupby(["Item ID", "Item Name"])

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

