# Heroes Of Pymoli Data Analysis
* Of the 573 active players, the vast majority are male (81%). There also exists, a smaller, but notable proportion of female players (17.45%).

* Our peak age demographic falls between 20-24 (45%) with secondary groups falling between 15-19 (17.45%) and 25-29 (15.18%).

* Players in all age groups are equally active with purchases and the average purchase for a player is `$`2.93. Even in different age groups and demographics the average purchase price is roughly around the same(`$`2.93)
-----

In [12]:
# Importing dependencies
import pandas as pd

# Reading data from a json file
filename = 'purchase_data.json'
purchase_df = pd.read_json(filename)

In [2]:
purchase_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Player Count

In [3]:
# Finding total number of unique players

player_count = len(purchase_df["SN"].unique())
total_players = pd.DataFrame({"Total players": [player_count]})
total_players


Unnamed: 0,Total players
0,573


## Purchasing Analysis (Total)

In [4]:
# Purchasing Analysis (Total)
# Finding number of unique items in the purchase dataset
group_unique_items = purchase_df.groupby(['Item ID', 'Item Name'])
unique_items = len(group_unique_items)

# Finding the average and total amount of purchases 
avg_price = purchase_df[['Price']].mean()
total_revenue = purchase_df[['Price']].sum()

# Finding the total number of Items purchased
total_purchases = purchase_df['Item ID'].count()

# Creating a summary table for Total Purchasing Analysis
total_purchase_table = pd.DataFrame({'Number of Unique Items': [unique_items],\
                                      'Average Price': avg_price.map('${:.2f}'.format),\
                                      'Number of Purchases': total_purchases,\
                                      'Total Revenue': total_revenue.map('${:,}'.format)})

# Ordering the columns and dropping the grouped 'index' column
columns = ['Number of Unique Items', 'Average Price', 'Number of Purchases', 'Total Revenue']
total_purchase_summary_df = total_purchase_table[columns]
total_purchase_summary_df.reset_index(drop=True)

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$2.93,780,"$2,286.33"


## Gender Demographics

In [5]:
# Finding total number of unique players
total_gender_count = purchase_df['SN'].nunique()

# Finding total number of unique players by gender
grouped_gender = purchase_df.groupby(['Gender'])['SN'].nunique()

# Finding percentage of players by gender
player_percentage = (grouped_gender/total_gender_count)*100

# Creating a summary table for demographics by gender
demographics = pd.DataFrame({'Percentage of Players': player_percentage.map("{:.2f}%".format),\
                              'Total Count': grouped_gender})
demographics.index.name = None
demographics.sort_values('Percentage of Players', ascending=False)



Unnamed: 0,Percentage of Players,Total Count
Male,81.15%,465
Female,17.45%,100
Other / Non-Disclosed,1.40%,8



## Purchasing Analysis (Gender)

In [6]:
# Purchase Analysis by Gender
# Computing Purchase Count 
gender_group = purchase_df.groupby(['Gender'])
purchase_count = gender_group['Price'].count()

# Calculating the Average price per Gender group
average_price = gender_group['Price'].mean()

# Calculating the total purchases in the dataset
total_purchase = gender_group['Price'].sum()

# Calculating the unique player count per gender group
player_count = gender_group['SN'].nunique()

# Calculating the Normalized Totals for each gender
normalized_totals = (total_purchase/player_count)

# Creating the summary dataframe for Purchase Anlaysis(Gender) and formatting 
gender_purchase_analysis_df = pd.DataFrame({'Purchase Count': purchase_count, 
                                            'Average Purchase Price': average_price.map('${:.2f}'.format),
                                            'Total Purchase Value': total_purchase.map('${:,.2f}'.format),
                                            'Normalized Totals': normalized_totals.map('${:.2f}'.format)})

# Ordering the columns in the output table
columns = ['Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 'Normalized Totals']
gender_purchase_analysis_df = gender_purchase_analysis_df[columns]
gender_purchase_analysis_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$2.82,$382.91,$3.83
Male,633,$2.95,"$1,867.68",$4.02
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


## Age Demographics

In [7]:
# Age Demographics

# Copying the original dataset to a new dataframe 
purchase_demo_df = pd.DataFrame(purchase_df)

# Create the bins in which Data will be held
# Bins are (0 < x <= 9), (9 < x <= 14), (14 < x <= 19),(19 < x <= 24), 
#         (24 < x <= 29), (29 < x <= 34), (34 < x <= 39),(39 < x <= 50)
bins = [0, 9, 14, 19, 24, 29, 34, 39, 50]

# Create Labels for the bins
group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

# Splitting age groups into bins and adding the resultant series to the Purchase dataframe
purchase_demo_df['Age Category']  = pd.cut(purchase_demo_df['Age'], bins, labels=group_names)

# Grouping by Age Category 
group_age_series = purchase_demo_df.groupby(['Age Category'])

# Calculating unique player count per each Age Category.
unique_players = group_age_series['SN'].nunique()
# Converting the series to a Dataframe and renaming the count column
unique_players_df = pd.DataFrame(unique_players).rename(columns={'SN': 'Total Count'})

# Calculating the total number of players in the dataset
total_player_count = unique_players.sum()

# Calculating the percentage of players per each age group, and formatting with two decimal places
percent_player_count = round((unique_players/total_player_count)*100,2)

# Converting the series to a Dataframe and renaming the percentage count column
percent_player_count_df = pd.DataFrame(percent_player_count).rename(columns={'SN': 'Percentage of Players'})

# Merging the percent_player dataframe and unique_player dataframe with 'Age Category' as index 
age_demography_df = pd.merge(percent_player_count_df, unique_players_df, left_index=True, right_index=True, how='inner')
age_demography_df.index.name=None

# Formatting the Percentage of Players column
age_demography_df['Percentage of Players'] = age_demography_df['Percentage of Players'].map('{:.2f}%'.format)
age_demography_df


Unnamed: 0,Percentage of Players,Total Count
<10,3.32%,19
10-14,4.01%,23
15-19,17.45%,100
20-24,45.20%,259
25-29,15.18%,87
30-34,8.20%,47
35-39,4.71%,27
40+,1.92%,11


## Purchasing Analysis (Age)

In [8]:
# Purchase Analysis (Age)

# Copying the original dataset to a new dataframe 
purchase_age_df = pd.DataFrame(purchase_df)

# Create the bins in which Data will be held
# Bins are (0 < x <= 9), (9 < x <= 14), (14 < x <= 19),(19 < x <= 24), 
#         (24 < x <= 29), (29 < x <= 34), (34 < x <= 39),(39 < x <= 50)
bins = [0, 9, 14, 19, 24, 29, 34, 39, 50]

# Create Labels for the bins
group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

# Splitting age groups into bins and adding the resultant series to the Purchase dataframe
purchase_age_df['Age Category']  = pd.cut(purchase_age_df['Age'], bins, labels=group_names)

# Grouping by Age Category 
group_age_series = purchase_age_df.groupby(['Age Category'])

# Calculating Total player count per each Age Category.
player_count_age_group = group_age_series['SN'].nunique()

# Calculating Purchase Count by Age Category
purchase_count = group_age_series['Price'].count()
purchase_count

# Calculating the Average Purchase Value by Age Category
average_purchase_value = group_age_series['Price'].mean()

# Calculating the Total Purchase Value by Age Category
total_purchase_value = group_age_series['Price'].sum()

# Calculating the Normalized Totals for each gender
normalized_totals = (total_purchase_value/player_count_age_group)

# Creating the summary dataframe for Purchase Anlaysis(Age) and formatting 
purchase_analysis_byage_df = pd.DataFrame({'Purchase Count': purchase_count,
                                           'Average Purchase Value': average_purchase_value.map('${:.2f}'.format),
                                           'Total Purchase Value': total_purchase_value.map('${:.2f}'.format),
                                           'Normalized Totals': normalized_totals.map('${:.2f}'.format)})

# Ordering the columns in the output table 
columns = ['Purchase Count', 'Average Purchase Value', 'Total Purchase Value', 'Normalized Totals']
purchase_analysis_byage_df = purchase_analysis_byage_df[columns]

# Dropping the index name
purchase_analysis_byage_df.index.name = None
purchase_analysis_byage_df

Unnamed: 0,Purchase Count,Average Purchase Value,Total Purchase Value,Normalized Totals
<10,28,$2.98,$83.46,$4.39
10-14,35,$2.77,$96.95,$4.22
15-19,133,$2.91,$386.42,$3.86
20-24,336,$2.91,$978.77,$3.78
25-29,125,$2.96,$370.33,$4.26
30-34,64,$3.08,$197.25,$4.20
35-39,42,$2.84,$119.40,$4.42
40+,17,$3.16,$53.75,$4.89


## Top Spenders

In [9]:
#Top 5 spenders list
sn_df = purchase_df.groupby(['SN'])

# Finding the total count, average and the total purchases in price
purchase_count = sn_df['Price'].count()
avg_purchase_price = sn_df['Price'].mean()
total_purchase_price = sn_df['Price'].sum()

# Creating a summary table for Spender's Analysis List
total_spend_table = pd.DataFrame({'Purchase Count': purchase_count,\
                                  'Average Purchase Price': avg_purchase_price.map('${:.2f}'.format),\
                                  'Total Purchase Value': total_purchase_price})

# Ordering the columns and sorting the table by descending order
columns = ['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']
total_spending_df = total_spend_table[columns].sort_values('Total Purchase Value',ascending=False)

# Formatting the Total Purchase Value
total_spending_df['Total Purchase Value'] = total_spending_df['Total Purchase Value'].map('${:.2f}'.format)

# Selecting the top 5 max spenders in the dataset
total_spending_df.iloc[0:5,]

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


## Most Popular Items

In [10]:
# Most Popular Items

# Get the unique items by grouping Item ID and Item Name columns
items_group = purchase_df.groupby(['Item ID','Item Name'])

# Getting the purchase count for each Item
purchase_count = items_group['Price'].count()

# Calculating the average price of items
avg_price = items_group['Price'].mean()

# Calculating the total purchases
total_purchase  = items_group['Price'].sum()

# Creating the summary dataframe for displaying Most Profitable Items
profitable_table = pd.DataFrame({'Purchase Count': purchase_count,
                                    'Item Price': avg_price,
                                    'Total Purchase Value': total_purchase})

# Ordering the columns in the output table
columns = ['Purchase Count','Item Price','Total Purchase Value']
profitable_table = profitable_table[columns]

# Sort the table by 'Purchase Count' in descending order
profitable_table = profitable_table.sort_values('Purchase Count', ascending=False)

# formatting the purcahse value and Item Price
profitable_table['Total Purchase Value'] = profitable_table['Total Purchase Value'].map('${:.2f}'.format)
profitable_table['Item Price'] = profitable_table['Item Price'].map('${:.2f}'.format)
profitable_table.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41


## Most Profitable Items

In [11]:
# Most Profitable Items

# Get the unique items by grouping Item ID and Item Name columns
items_group = purchase_df.groupby(['Item ID','Item Name'])

# Getting the purchase count for each Item
purchase_count = items_group['Price'].count()

# Calculating the average price of items
avg_price = items_group['Price'].mean()

# Calculating the total purchases
total_purchase  = items_group['Price'].sum()

# Creating the summary dataframe for displaying Most Profitable Items
profitable_table = pd.DataFrame({'Purchase Count': purchase_count,
                                    'Item Price': avg_price,
                                    'Total Purchase Value': total_purchase})

# Ordering the columns in the output table
columns = ['Purchase Count','Item Price','Total Purchase Value']
profitable_table = profitable_table[columns]

# Sort the table by total purchase value in descending order
profitable_table = profitable_table.sort_values('Total Purchase Value', ascending=False)

# formatting the purcahse value and Item Price
profitable_table['Total Purchase Value'] = profitable_table['Total Purchase Value'].map('${:.2f}'.format)
profitable_table['Item Price'] = profitable_table['Item Price'].map('${:.2f}'.format)
profitable_table.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,$37.26
115,Spectral Diamond Doomblade,7,$4.25,$29.75
32,Orenmir,6,$4.95,$29.70
103,Singed Scalpel,6,$4.87,$29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,$28.88
