In [217]:
# Dependencies
import pandas as pd

In [218]:
# Set json path
json_path = 'raw_data/purchase_data.json'

In [219]:
# Read json file
heroes_df = pd.read_json(json_path)
heroes_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [220]:
# Look at column names
heroes_df.columns

Index(['Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'SN'], dtype='object')

In [221]:
# Reorginizing columns
heroes_df = heroes_df[['SN', 'Gender', 'Age', 'Item ID', 'Item Name', 'Price']]
heroes_df.head()

Unnamed: 0,SN,Gender,Age,Item ID,Item Name,Price
0,Aelalis34,Male,38,165,Bone Crushing Silver Skewer,3.37
1,Eolo46,Male,21,119,"Stormbringer, Dark Blade of Ending Misery",2.32
2,Assastnya25,Male,34,174,Primitive Blade,2.46
3,Pheusrical25,Male,21,92,Final Critic,1.36
4,Aela59,Male,23,63,Stormfury Mace,1.27


In [222]:
# Checking out the data
heroes_df.describe()

Unnamed: 0,Age,Item ID,Price
count,780.0,780.0,780.0
mean,22.729487,91.29359,2.931192
std,6.930604,52.707537,1.11578
min,7.0,0.0,1.03
25%,19.0,44.0,1.96
50%,22.0,91.0,2.88
75%,25.0,135.0,3.91
max,45.0,183.0,4.95


In [223]:
# Find number of unique players
total_players = len(heroes_df['SN'].unique())
total_players

573

In [224]:
# Number of unique items
number_unique_items = len(heroes_df['Item Name'].unique())
print(number_unique_items)

# Average purchase price of unique items
avg_price = heroes_df['Price'].mean()
print(avg_price)

# Number of purchases
purchases = len(heroes_df['Price'])
print(purchases)

# Total revenue
revenue = heroes_df['Price'].sum()
print(revenue)

179
2.931192307692303
780
2286.3299999999963


In [225]:
# Create purchasing analysis dataframe
purchase_analysis = {'Number of Unique Items': [number_unique_items], 
                        'Average Price': [f'{avg_price:.2f}'], 
                        'Number of Purchases': [purchases], 
                        'Total Revenue': [f'{revenue:.2f}']}

purchase_analysis_df = pd.DataFrame(purchase_analysis)

purchase_analysis_df = purchase_analysis_df[['Number of Unique Items', 'Average Price', 'Number of Purchases', 'Total Revenue']]

purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,2.93,780,2286.33


In [226]:
# Only grab unique players by dropping duplicates
unique_heroes_df = heroes_df.drop_duplicates(subset='SN', keep='first')
unique_heroes_df.describe()

Unnamed: 0,Age,Item ID,Price
count,573.0,573.0,573.0
mean,22.603839,90.687609,2.977469
std,6.681447,51.618271,1.096957
min,7.0,0.0,1.03
25%,20.0,45.0,2.11
50%,22.0,91.0,2.98
75%,25.0,132.0,3.9
max,45.0,183.0,4.95


In [227]:
# Group by gender to see count
unique_grouped_gender = unique_heroes_df.groupby('Gender').count()
unique_grouped_gender

Unnamed: 0_level_0,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,100,100,100,100,100
Male,465,465,465,465,465
Other / Non-Disclosed,8,8,8,8,8


In [228]:
# Convert the counts of each gender into a dataframe 
unique_grouped_gender_df = pd.DataFrame(unique_grouped_gender)

# Add percentage of players
unique_grouped_gender_df['Percentage of Players'] = round(unique_grouped_gender['SN']/total_players*100, 2)
unique_grouped_gender_df

Unnamed: 0_level_0,SN,Age,Item ID,Item Name,Price,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,100,100,100,100,100,17.45
Male,465,465,465,465,465,81.15
Other / Non-Disclosed,8,8,8,8,8,1.4


In [229]:
# Rename SN column
unique_grouped_gender_df = unique_grouped_gender_df.rename(columns={'SN': 'Total Count'})

# Reorganize columns
gender_demo = unique_grouped_gender_df[['Percentage of Players', 'Total Count']]
gender_demo

Unnamed: 0_level_0,Percentage of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,17.45,100
Male,81.15,465
Other / Non-Disclosed,1.4,8


In [230]:
# Group by gender for Purchasing Analysis (Gender)
grouped_gender = heroes_df.groupby(['Gender'])
grouped_gender

<pandas.core.groupby.DataFrameGroupBy object at 0x0000015CDED9FBE0>

In [231]:
# Grab purchase count for each gender
purchase_count = grouped_gender['Price'].count()
purchase_count

# Grab average price for each gender
avg_purchase_price = grouped_gender['Price'].mean()
avg_purchase_price

# Grab sum for each gender
total_purchase_value = grouped_gender['Price'].sum()
total_purchase_value

Gender
Female                    382.91
Male                     1867.68
Other / Non-Disclosed      35.74
Name: Price, dtype: float64

In [232]:
# Create Purchasing Analysis (Gender)
gender_purchasing_analysis = pd.DataFrame({'Purchase Count': purchase_count, 
                                           'Average Purchase Price': avg_purchase_price, 
                                           'Total Purchase Value': total_purchase_value, 
                                           'Normalized Totals': [0, 0, 0]})
# Reorder columns
gender_purchasing_analysis = gender_purchasing_analysis[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 'Normalized Totals']]

gender_purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,136,2.815515,382.91,0
Male,633,2.950521,1867.68,0
Other / Non-Disclosed,11,3.249091,35.74,0


In [233]:
# Create bins for Age Demographics Analysis
bins = [2, 9, 14, 19, 24, 29, 34, 39, 99]
labels = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

unique_heroes_df['Age Range'] = pd.cut(unique_heroes_df['Age'], bins=bins, labels=labels)
unique_heroes_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,SN,Gender,Age,Item ID,Item Name,Price,Age Range
0,Aelalis34,Male,38,165,Bone Crushing Silver Skewer,3.37,35-39
1,Eolo46,Male,21,119,"Stormbringer, Dark Blade of Ending Misery",2.32,20-24
2,Assastnya25,Male,34,174,Primitive Blade,2.46,30-34
3,Pheusrical25,Male,21,92,Final Critic,1.36,20-24
4,Aela59,Male,23,63,Stormfury Mace,1.27,20-24


In [234]:
# Group by bins
grouped_age_range = unique_heroes_df.groupby(['Age Range']).count()
grouped_age_range

Unnamed: 0_level_0,SN,Gender,Age,Item ID,Item Name,Price
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
<10,19,19,19,19,19,19
10-14,23,23,23,23,23,23
15-19,100,100,100,100,100,100
20-24,259,259,259,259,259,259
25-29,87,87,87,87,87,87
30-34,47,47,47,47,47,47
35-39,27,27,27,27,27,27
40+,11,11,11,11,11,11


In [239]:
# Convert the counts of each age range into a dataframe
age_demo_df = pd.DataFrame(grouped_age_range)


# Add percentage of players
age_demo_df['Percentage of Players'] = round(age_demo_df['Age']/total_players*100, 2)

# Rename Age column
# age_demo_df = age_demo_df.rename({'Age': 'Total Count'})
age_demo_df

Unnamed: 0_level_0,SN,Gender,Age,Item ID,Item Name,Price,Percentage of Players
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
<10,19,19,19,19,19,19,3.32
10-14,23,23,23,23,23,23,4.01
15-19,100,100,100,100,100,100,17.45
20-24,259,259,259,259,259,259,45.2
25-29,87,87,87,87,87,87,15.18
30-34,47,47,47,47,47,47,8.2
35-39,27,27,27,27,27,27,4.71
40+,11,11,11,11,11,11,1.92
