# Heroes Of Pymoli Report

### Importing Modules

In [1]:
import pandas as pd
import os

### Reading in Data Files 

In [2]:
#path to the data file
file_url = os.path.join('Data','purchase_data_1.json')

#reading the json file into as a DataFrame
user_data_df = pd.read_json(file_url)

### Display DataFrame

In [3]:
user_data_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Data Cleaning 

### Check if all columns are equal: Pass

In [4]:
user_data_df.count()

Age          780
Gender       780
Item ID      780
Item Name    780
Price        780
SN           780
dtype: int64

### Drop empty rows and check the column count again

In [5]:
user_data_df = user_data_df.dropna(how='any')

user_data_df.count()

Age          780
Gender       780
Item ID      780
Item Name    780
Price        780
SN           780
dtype: int64

### Change the column name

In [6]:
user_data_df = user_data_df.rename(columns={'SN':"Username"})

### Display the DataFrame after Data Cleaning

In [7]:
user_data_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,Username
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


## Analysis

* Player Count
* Purchasing Analysis (Total)
* Gender Demographics
* Purchasing Analysis (Gender)
* Age Demographics
* Top Spenders
* Most Popular Items
* Most Profitable Items



### Player Count

In [8]:
player_count = len(user_data_df['Username'].unique())
print("Number of Players: {}".format(player_count))

Number of Players: 573


### Purchasing Analysis
* Number of Unique Items
* Average Purchase Price
* Total Number of Purchases
* Total Revenue

In [9]:
unique_items = len(user_data_df['Item ID'].unique())
avg_price = user_data_df['Price'].mean()
total_no_of_purchases = user_data_df['Price'].count()
total_revenue = user_data_df['Price'].sum()

purchasing_analysis_df = pd.DataFrame({
    "Number of Unique Items": unique_items,
    "Average Purchase Price": avg_price,
    "Total Number of Purchases": total_no_of_purchases,
    "Total Revenue": total_revenue,
},index = [0])

# data mugging 
purchasing_analysis_df['Average Purchase Price'] = purchasing_analysis_df['Average Purchase Price'].map("$ {:,.2f}".format)
purchasing_analysis_df['Total Revenue'] = purchasing_analysis_df['Total Revenue'].map("$ {:,.2f}".format)

purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,183,$ 2.93,780,"$ 2,286.33"


### Gender Demographics
* Percentage and Count of Male Players
* Percentage and Count of Female Players
* Percentage and Count of Other / Non-Disclosed

In [56]:
player_demographics = user_data_df[['Gender','Age','Username']]
player_demographics = player_demographics.drop_duplicates()

total_count = player_demographics['Gender'].value_counts()

gender_demographics_df = pd.DataFrame({
    "Total Count": total_count
})

gender_demographics_df

Unnamed: 0,Total Count
Male,465
Female,100
Other / Non-Disclosed,8


### Purchasing Analysis (Gender)

`The below each broken by gender`
* Purchase Count
* Average Purchase Price
* Total Purchase Value
* Normalized Totals

In [30]:
purchase_count = user_data_df.groupby('Gender').count()['Price']
avg_purchase_price = user_data_df.groupby('Gender').mean()['Price']
total_purchase_value = user_data_df.groupby('Gender').sum()['Price']
normalized_total = user_data_df.groupby('Gender').sum()['Price'] / gender_demographics_df['Total Count']

pa = pd.DataFrame({
    "Purchase Count": purchase_count,
    "Average Purchase Price": avg_purchase_price,
    "Total Purchase Value": total_purchase_value,
    "Normalized Totals": normalized_total
})

pa['Average Purchase Price'] = pa['Average Purchase Price'].map("$ {:,.2f}".format)
pa["Total Purchase Value"] = pa["Total Purchase Value"].map("$ {:,.2f}".format)
pa["Normalized Totals"] = pa["Normalized Totals"].map("$ {:,.2f}".format)

pa

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,$ 2.82,$ 382.91,$ 3.83
Male,633,$ 2.95,"$ 1,867.68",$ 4.02
Other / Non-Disclosed,11,$ 3.25,$ 35.74,$ 4.47


### Age Demographics
* Percentage and Count by each age range

In [66]:
labels = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']
bins = [0,10,15,20,25,30,35,40,100]

binned = pd.cut(player_demographics['Age'],bins=bins,labels=labels,right=False)
player_demographics['Age Range'] = binned

total_count = player_demographics['Age Range'].value_counts()
percentage = round(total_count/player_count * 100,2)

ad = pd.DataFrame({
    "Total Count": total_count,
    "Total Percentage": percentage
})

ad['Total Percentage'] = ad['Total Percentage'].map("{:,.2f}%".format)

ad

Unnamed: 0,Total Count,Total Percentage
20-24,259,45.20%
15-19,100,17.45%
25-29,87,15.18%
30-34,47,8.20%
35-39,27,4.71%
10-14,23,4.01%
<10,19,3.32%
40+,11,1.92%


### Age Demographics

`The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)`
* Purchase Count
* Average Purchase Price
* Total Purchase Value
* Normalized Totals

In [67]:
labels = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']
bins = [0,10,15,20,25,30,35,40,100]

binned = pd.cut(user_data_df['Age'],bins=bins,labels=labels,right=False)
user_data_df['Age Range'] = binned

purchase_count = user_data_df.groupby('Age Range').count()['Price']
avg_purchase_price = user_data_df.groupby('Age Range').mean()['Price']
total_purchase_value = user_data_df.groupby('Age Range').sum()['Price']
normalized_totals = total_purchase_value /ad['Total Count']

age_demographics_df = pd.DataFrame({
    "Purchase Count": purchase_count,
    "Average Purchase Price": avg_purchase_price,
    "Total Purchase Value": total_purchase_value,
    "Normalized Total":normalized_totals
})

age_demographics_df["Average Purchase Price"] = age_demographics_df["Average Purchase Price"].map("$ {:,.2f}".format)
age_demographics_df["Total Purchase Value"] = age_demographics_df["Total Purchase Value"].map("$ {:,.2f}".format)
age_demographics_df['Normalized Total'] = age_demographics_df['Normalized Total'].map("$ {:,.2f}".format)
age_demographics_df

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Total
10-14,35,$ 2.77,$ 96.95,$ 4.22
15-19,133,$ 2.91,$ 386.42,$ 3.86
20-24,336,$ 2.91,$ 978.77,$ 3.78
25-29,125,$ 2.96,$ 370.33,$ 4.26
30-34,64,$ 3.08,$ 197.25,$ 4.20
35-39,42,$ 2.84,$ 119.40,$ 4.42
40+,17,$ 3.16,$ 53.75,$ 4.89
<10,28,$ 2.98,$ 83.46,$ 4.39


### Top Spenders
`Identify the the top 5 spenders in the game by total purchase value, then list (in a table):`
* Username
* Purchase Count
* Average Purchase Price
* Total Purchase Value

In [85]:
purchase_count_tp = user_data_df.groupby('Username').count()['Price']
avg_purchase_price_tp = user_data_df.groupby('Username').mean()['Price']
total_purchase_value_tp = user_data_df.groupby('Username').sum()['Price']

top_spender_df = pd.DataFrame({
    "Purchase Count": purchase_count_tp,
    "Average Purchase Price": avg_purchase_price_tp,
    "Total Purchase Value":total_purchase_value_tp
})

top_spender_df['Average Purchase Price'] = top_spender_df['Average Purchase Price'].map("$ {:,.2f}".format)
top_spender_df['Total Purchase Value'] = top_spender_df['Total Purchase Value'].map("$ {:,.2f}".format)

top_spender_df = top_spender_df.sort_values(['Total Purchase Value'],ascending=False)
top_spender_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Username,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Qarwen67,4,$ 2.49,$ 9.97
Sondim43,3,$ 3.13,$ 9.38
Tillyrin30,3,$ 3.06,$ 9.19
Lisistaya47,3,$ 3.06,$ 9.19
Tyisriphos58,2,$ 4.59,$ 9.18


### Most Popular Items

`Identify the 5 most popular items by purchase count, then list (in a table):`
* Item ID
* Item Name
* Purchase Count
* Item Price
* Total Purchase Value

In [92]:
item_data = user_data_df

purchase_count= item_data.groupby(['Item ID','Item Name']).count()['Price']
avg_purchase_price = item_data.groupby(['Item ID','Item Name']).mean()['Price']
total_purchase_value = item_data.groupby(['Item ID','Item Name']).sum()['Price']

top_item_df = pd.DataFrame({
    "Purchase Count": purchase_count,
    "Average Purchase Price": avg_purchase_price,
    "Total Purchase Value":total_purchase_value
})

top_item_df['Average Purchase Price'] = top_item_df['Average Purchase Price'].map("$ {:,.2f}".format)
top_item_df['Total Purchase Value'] = top_item_df['Total Purchase Value'].map("$ {:,.2f}".format)
top_item_df = top_item_df.loc[:,["Item ID", "Item Name", "Price"]]

top_item_df = top_item_df.sort_values(['Purchase Count'],ascending=False)
top_item_df.head()

KeyError: "None of [['Item ID', 'Item Name', 'Price']] are in the [columns]"

### Most Profitable Items

`Identify the 5 most profitable items by total purchase value, then list (in a table):`
- Item ID
- Item Name
- Purchase Count
- Item Price
- Total Purchase Value

## Handler Functions