# Pymoli Summary
* The majority of HoP players are male. Female players, however, tend to spend $0.18 more on average than their male counterparts
* Players between the ages of 20 and 24 are the Pymoli's largest source of revenue


In [None]:
# Import dependencies 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Declare purchase_data.csv as file
file = "purchase_data.csv"

In [None]:
# Load purchase data into data frame and view first five entries
data_df = pd.read_csv(file)
data_df.head()

In [None]:
# Total HoP players
player_count = data_df['SN'].value_counts().size
player_count

In [None]:
# Purchasing analysis (total)
unique_item_count = data_df['Item ID'].value_counts().size
avg_purchase_price = '${:,.2f}'.format(data_df['Price'].mean())
total_purchases = data_df.count()[0]
total_revenue = '${:,.2f}'.format(data_df['Price'].sum())

purchasing_analysis_df = pd.DataFrame({'Number of Unique Items':unique_item_count,
                                       'Average Purchase Price':avg_purchase_price,
                                       'Total Purchases':total_purchases,
                                       'Total Revenue':total_revenue},index=['Values'])
purchasing_analysis_df


In [None]:
# Gender Demographics
gender = data_df.groupby('Gender')['SN'].unique()
females = gender['Female'].size
males = gender['Male'].size
non_disclosed = gender['Other / Non-Disclosed'].size

percentages = ['{:.2f}'.format(100*males/player_count)+'%',
               '{:.2f}'.format(100*females/player_count)+'%',
               '{:.2f}'.format(100*non_disclosed/player_count)+'%']

demographic_dict = {'Percentage of Players':percentages,
                    'Total':[males,females,non_disclosed]}

demographics_df = pd.DataFrame(demographic_dict,index=['Males','Females','Other/Non-Disclosed'])
demographics_df

In [None]:
# Purchasing Analysis by Gender
male_purchase_count = data_df.loc[data_df['Gender']=='Male','Purchase ID'].count()
male_total_purchase_value = data_df.loc[data_df['Gender']=='Male','Price'].sum()
male_avg_purchase_price = male_total_purchase_value/male_purchase_count
male_avg_purchase_total_per_person = male_total_purchase_value/males

female_purchase_count = data_df.loc[data_df['Gender']=='Female','Purchase ID'].count()
female_total_purchase_value = data_df.loc[data_df['Gender']=='Female','Price'].sum()
female_avg_purchase_price = female_total_purchase_value/female_purchase_count
female_avg_purchase_total_per_person = female_total_purchase_value/females

non_disclosed_purchase_count = data_df.loc[data_df['Gender']=='Other / Non-Disclosed']['Purchase ID'].count()
non_disclosed_total_purchase_value = data_df.loc[data_df['Gender']=='Other / Non-Disclosed']['Price'].sum()
non_disclosed_avg_purchase_price = non_disclosed_total_purchase_value/non_disclosed_purchase_count
non_disclosed_avg_purchase_total_per_person = non_disclosed_total_purchase_value/non_disclosed

purchase_dict = {'Number of Purchases':[male_purchase_count,female_purchase_count,non_disclosed_purchase_count],
                 'Average Purchase Price':['${:,.2f}'.format(male_avg_purchase_price),'${:,.2f}'.format(female_avg_purchase_price),'${:,.2f}'.format(non_disclosed_avg_purchase_price)],
                 'Total Purchase Value':['${:,.2f}'.format(male_total_purchase_value),'${:,.2f}'.format(female_total_purchase_value),'${:,.2f}'.format(non_disclosed_total_purchase_value)],
                 'Average Purchase Total per Person by Gender':['${:,.2f}'.format(male_avg_purchase_total_per_person),'${:,.2f}'.format(female_avg_purchase_total_per_person),'${:,.2f}'.format(non_disclosed_avg_purchase_total_per_person)]
                }

gender_purchasing_df = pd.DataFrame(purchase_dict,index=['Males','Females','Other/Non-Disclosed'])
gender_purchasing_df

In [None]:
# Age Demographics
bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
age_groups = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
data_df['Age Group'] = pd.cut(data_df['Age'],bins,right=True,labels=age_groups)
age_demographics_df = data_df.drop_duplicates('SN',keep='first')

player_count_by_age = age_demographics_df.groupby('Age Group')['SN'].count()
player_percentage_by_age = (player_count_by_age/player_count).map("{:.2%}".format)

age_demographics_df = pd.DataFrame({'Percentage of Players':player_percentage_by_age,
                                    'Total Count':player_count_by_age})
age_demographics_df

In [None]:
# Purchasing Analysis by Age
min_age = data_df['Age'].min()
max_age = data_df['Age'].max()
data_df['Age Group'] = pd.cut(data_df['Age'],bins,right=True,labels=age_groups)

age_purchase_count = data_df.groupby('Age Group')['Purchase ID'].count()
age_purchase_total = data_df.groupby('Age Group')['Price'].sum()
age_avg_purchase_value = age_purchase_total/age_purchase_count

age_purchasing_df = pd.DataFrame({'Number of Purchases':age_purchase_count,
                                  'Average Purchase Price':age_avg_purchase_value.map("${:,.2f}".format),
                                  'Total Purchase Value':age_purchase_total.map("${:,.2f}".format)
                                  })

age_purchasing_df

In [None]:
# Top Spenders
top_5_spenders = data_df.groupby('SN')['Price'].sum().sort_values(ascending=False).index[0:5]
top_5_spenders_df = data_df.loc[data_df['SN'].isin(top_5_spenders)]
purchase_count = top_5_spenders_df.groupby('SN')['Purchase ID'].count()
total_purchase_value = top_5_spenders_df.groupby('SN')['Price'].sum()
avg_purchase_value = total_purchase_value/purchase_count
pd.DataFrame({'Number of Purchases':purchase_count,
              'Average Purchase Price':avg_purchase_value.map("${:,.2f}".format),
              'Total Purchase Value':total_purchase_value.map("${:,.2f}".format)
             }).sort_values(by='Total Purchase Value',ascending=False)

In [None]:
# Most Popular Items (mp)
mp_item_df = data_df.loc[:,['Item Name','Item ID','Price']]
mp_item_groupby = data_df.loc[:,['Item Name','Item ID','Price']].groupby(['Item ID','Item Name'])
mp_item_count = mp_item_groupby['Price'].count()
mp_total_value = mp_item_groupby['Price'].sum()
mp_item_df = mp_item_df.drop_duplicates(['Item ID','Item Name','Price'])
mp_item_df.set_index(['Item ID','Item Name'], inplace=True)

mp_summary_df = pd.DataFrame({'Purchase Count':mp_item_count,
                              'Price':mp_item_df.loc[:,'Price'].map("${:,.2f}".format),
                              'Total Purchase Value':mp_total_value.map("${:,.2f}".format)})

mp_summary_df.sort_values(by='Purchase Count',ascending=False).head()

In [None]:
# Most Profitable items 
mp_summary_df = pd.DataFrame({'Purchase Count':mp_item_count,
                              'Price':mp_item_df.loc[:,'Price'].map("${:,.2f}".format),
                              'Total Purchase Value':mp_total_value})
mp_summary_df = mp_summary_df.sort_values(by='Total Purchase Value',ascending=False)
mp_summary_df['Total Purchase Value'] = mp_summary_df['Total Purchase Value'].map("${:,.2f}".format)
mp_summary_df.head()