### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# PyMoli dat file for analysis
PyMoli_data = ("Resources/purchase_data.csv")

# Read Purchasing File and store into Pandas data frame
PyMoli_df = pd.read_csv(PyMoli_data)

# View Header
PyMoli_df.head()

# Get column list
# col_list =PyMoli_df.columns.values.tolist()

# #Check Data types for analysis later
# print(PyMoli_df.dtypes)


## Player Count

* Display the total number of players


In [2]:
player_count = PyMoli_df['SN'].nunique()

player_count_df = pd.DataFrame( {'Total Players':[player_count]})


from IPython.display import HTML

styles = [
    dict(selector="td", props=[("font-size", "110%"),
                               ("text-align", "left")]),
    dict(selector="th", props=[("font-size", "125%"),
                               ("text-align", "left")]),
]
html = player_count_df.style.set_table_styles(styles)
html




Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
unique_items = PyMoli_df['Item Name'].nunique()
average_price =PyMoli_df['Price'].mean()
quantity_purchases = PyMoli_df['Purchase ID'].count()
total_revenue =PyMoli_df['Price'].sum()

#Create a summary data frame to hold the results
dataDict = {'Total Unique Items':[unique_items],'Average Price':[average_price],'Quantity of Purchases':[quantity_purchases],'Total Revenue':[total_revenue]}
purchasing_analysis_df = pd.DataFrame(dataDict)

#Improve readability with currency formating
purchasing_analysis_df['Average Price'] = purchasing_analysis_df['Average Price'].map("${:.2f}".format)
purchasing_analysis_df['Total Revenue'] = purchasing_analysis_df['Total Revenue'].map("${:.2f}".format)

#Display the summary data frame
from IPython.display import HTML

styles = [
    dict(selector="td", props=[("font-size", "110%"),
                               ("text-align", "left")])
]
html = purchasing_analysis_df.style.set_table_styles(styles)
html    

Unnamed: 0,Total Unique Items,Average Price,Quantity of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


## Gender Demographics

dataFrame.drop_duplicates(subset=None, keep=’first’, inplace=False)
* Percentage and Count of Male Players
* Percentage and Count of Female Players
* Percentage and Count of Other / Non-Disclosed




In [4]:
#Get a dataframe of just names and genders 
SN_Gender_df= PyMoli_df[["SN","Gender"]]    
SN_Gender_unique_df = SN_Gender_df.drop_duplicates(subset=None, keep='first', inplace=False)

#Get groupby object to creae dataframe
gender_df = SN_Gender_unique_df.groupby('Gender')


#create output dataframe - divide by player_count - Total Count  & Percentage of Players
gender_demo_df = gender_df.count()
percentage_of_players = gender_demo_df["SN"]*100/player_count
gender_demo_df["Percentage of Players"] = percentage_of_players
gender_demo_df["Percentage of Players"] = gender_demo_df["Percentage of Players"].map("{:.2f}%".format)
gender_demo_df = gender_demo_df.sort_values('SN',ascending = False)
gender_demo_df.rename(columns = {"SN" : "Total Count"}, inplace = True)

from IPython.display import HTML

styles = [
    dict(selector="td", props=[("font-size", "110%"),
                               ("text-align", "left")]),
    dict(selector="th", props=[("font-size", "120%"),
                               ("text-align", "left")]),
]
html = gender_demo_df.style.set_table_styles(styles)
html


Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [5]:
#Purchase Count by gender
purchase_analysis_df = PyMoli_df.groupby('Gender')
purchases_by_gender =  purchase_analysis_df['Purchase ID'].count()

# Average Purchase Price by gender
average_purchase_price_by_gender = purchase_analysis_df['Price'].mean()

#Total Purchase Value by gender
total_revenues_by_gender = purchase_analysis_df['Price'].sum()

# Avg Total Purchase per Person by gender
average_purchase_total_by_gender = total_revenues_by_gender/gender_demo_df['Total Count']

#Create a summary data frame to hold the results
purchasing_analysis_by_gender_df = pd.concat([purchases_by_gender,average_purchase_price_by_gender],axis=1)
purchasing_analysis_by_gender_df = pd.concat([purchasing_analysis_by_gender_df,total_revenues_by_gender],axis=1)
purchasing_analysis_by_gender_df = pd.concat([purchasing_analysis_by_gender_df,average_purchase_total_by_gender],axis=1)
new_cols=['Purchase Count','Average Purchase Price','Total Purchase Value','Avg Total Purchase per Person']
purchasing_analysis_by_gender_df.columns=new_cols
purchasing_analysis_by_gender_df = purchasing_analysis_by_gender_df.sort_values('Purchase Count',ascending = False)

#Improve readability with currency formating
purchasing_analysis_by_gender_df['Average Purchase Price'] = purchasing_analysis_by_gender_df['Average Purchase Price'].map("${:.2f}".format)
purchasing_analysis_by_gender_df['Total Purchase Value'] = purchasing_analysis_by_gender_df['Total Purchase Value'].map("${:.2f}".format)
purchasing_analysis_by_gender_df['Avg Total Purchase per Person'] = purchasing_analysis_by_gender_df['Avg Total Purchase per Person'].map("${:.2f}".format)

#Display the summary data frame
from IPython.display import HTML

styles = [
    dict(selector="td", props=[("font-size", "100%"),
                               ("text-align", "left")]),
    dict(selector="th", props=[("font-size", "110%"),
                               ("text-align", "left")]),
]
html = purchasing_analysis_by_gender_df.style.set_table_styles(styles)
html


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,652,$3.02,$1967.64,$4.07
Female,113,$3.20,$361.94,$4.47
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

