# HEROS OF PYMOLI DATA ANALYSIS

    1. Of the 576 players playing the game, 84.03% of them identify as Male. They are the majority of players
    2. The largest age group is between 20-24, who make up 258 out of the 576 total players and also a majority of in game     purchases
    3. Despite not being the largest age group, 35-39 was on average spending more per item than 20-24, a difference of about $.55 per item.
    

In [40]:
#Import Dependencies
import pandas as pd
import numpy


In [41]:
#Bring in CSV
pymoli_csv = 'Resources/purchase_data.csv'

In [42]:
#Convert to Data Frame
pymoli_df = pd.read_csv(pymoli_csv)
pymoli_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


# Player Count
1. Display total amount of players

In [43]:
#Display Total Number Of Players
playercount = {'Total Players': pymoli_df['SN'].nunique()}

playercount_df = pd.DataFrame(playercount, index = [0])

#Print Total Players
playercount_df

Unnamed: 0,Total Players
0,576


# Purchasing Analysis (Total)

In [44]:
#Calculate Number of Unique Items
uniqueitems_count = pymoli_df['Item ID'].nunique()

#Calculate Average Purchase Price
avg_purchase = pymoli_df['Price'].mean()

#Calculate Total Number of Purchases
totalpurchase_count = pymoli_df['Purchase ID'].count()

#Calculate Total Revenue
totalrevenue = pymoli_df['Price'].sum()

In [45]:
#Display Purchasing Analysis(Total)
#Organize Variables Into A Dict
SummaryDict = [{'# of Unique Items': uniqueitems_count,
                'Average Price': avg_purchase,
                'Total Purchase Count': totalpurchase_count,
                'Total Revenue': totalrevenue}]

#Turn Dict Into DF
summary_df = pd.DataFrame(SummaryDict)

In [46]:
#Format New DF And Print
summary_df['Average Price'] = summary_df['Average Price'].map('${:.2f}'.format)
summary_df['Total Revenue'] = summary_df['Total Revenue'].map('${:,.2f}'.format)

#Print Summary Table
summary_df

Unnamed: 0,# of Unique Items,Average Price,Total Purchase Count,Total Revenue
0,183,$3.05,780,"$2,379.77"


# Percentage of Players By Gender

In [47]:
#Group By Gender
pymoli_gender_gp = pymoli_df.groupby(['Gender'])

#Create a DF out of grouping and counting
pymoli_gender_gp_count = pymoli_gender_gp.nunique()


# #Add Percentage Column
pymoli_gender_gp_count['Percentage Of Players'] = (pymoli_gender_gp_count['SN'] / pymoli_gender_gp_count['SN'].sum()) * 100


#Clean Up Columns And Rename And Print
organized_pct = pymoli_gender_gp_count[['SN', 'Percentage Of Players']]

organized_pct = organized_pct.rename(columns = {'SN': 'Number Of Players'})

organized_pct

Unnamed: 0_level_0,Number Of Players,Percentage Of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,81,14.0625
Male,484,84.027778
Other / Non-Disclosed,11,1.909722


# Purchasing Analysis By Gender

In [48]:
#Gender Purchasing Analysis

#Calculate Purchase Count
purchase_count = pymoli_df['Gender'].value_counts()


#Group By Gender
pymoli_gender_gp = pymoli_df.groupby(['Gender'])

#Calculate Player Count
player_count = pymoli_gender_gp['SN'].nunique()

#Create Average Purchase Price
avg_purchase = pymoli_gender_gp['Price'].mean()

#Create Total Purchase Value
tot_purchase = pymoli_gender_gp['Price'].sum()

#Create New DataFrame
gender_analysis_table = pd.DataFrame({'Purchase Count': purchase_count, 
                                    'Average Purchase Price': avg_purchase,
                                    'Total Purchase Value': tot_purchase})

#Calculate Average Total Purchase Per Person
gender_analysis_table['Average Total Purchase Per Person'] = gender_analysis_table['Total Purchase Value'] / player_count

#Format Avg Price, Total, and Avg Total
gender_analysis_table['Average Purchase Price'] = gender_analysis_table['Average Purchase Price'].map('${:.2f}'.format)
gender_analysis_table['Total Purchase Value'] = gender_analysis_table['Total Purchase Value'].map('${:,.2f}'.format)
gender_analysis_table['Average Total Purchase Per Person'] = gender_analysis_table['Average Total Purchase Per Person'].map('${:.2f}'.format)


#Print
gender_analysis_table

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Total Purchase Per Person
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


# Age Demographic Purchasing Analysis

In [49]:
#Create Bins
bins = [0, 10, 14, 19, 24, 29, 34, 39, 50]
binname = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34','35-39', '40+']

#Create Age Group By Cutting
pymoli_df['Age Group'] = pd.cut(pymoli_df['Age'], bins, labels=binname)

#Calculate Purchase Count
purchase_count = pymoli_df['Age Group'].value_counts()


#Create New DF by Grouping by Age Group
pymoli_age = pymoli_df.groupby(['Age Group'])


#Calculate Player Count
player_count = pymoli_age['SN'].nunique()

In [50]:
#Create Average Purchase Price
avg_purchase = pymoli_age['Price'].mean()

#Create Total Purchase Value
tot_purchase = pymoli_age['Price'].sum()

#Create New Data Frame Using All Variables Just Calculated
age_analysis_table = pd.DataFrame({'Player Count': player_count, 'Purchase Count': purchase_count, 
                                   'Average Purchase Price': avg_purchase,'Total Purchase Value': tot_purchase})

#Add In Average Total Purchase Per Person Per Group
age_analysis_table['Avg Total Purchase Per Person'] = age_analysis_table['Total Purchase Value'] / age_analysis_table['Player Count']

#Format Avg Price, Total, and Avg Total
age_analysis_table['Average Purchase Price'] = age_analysis_table['Average Purchase Price'].map('${:.2f}'.format)
age_analysis_table['Total Purchase Value'] = age_analysis_table['Total Purchase Value'].map('${:,.2f}'.format)
age_analysis_table['Avg Total Purchase Per Person'] = age_analysis_table['Avg Total Purchase Per Person'].map('${:.2f}'.format)


age_analysis_table

Unnamed: 0,Player Count,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase Per Person
10-14,15,19,$2.68,$50.95,$3.40
15-19,107,136,$3.04,$412.89,$3.86
20-24,258,365,$3.05,"$1,114.06",$4.32
25-29,77,101,$2.90,$293.00,$3.81
30-34,52,73,$2.93,$214.00,$4.12
35-39,31,41,$3.60,$147.67,$4.76
40+,12,13,$2.94,$38.24,$3.19
<10,24,32,$3.40,$108.96,$4.54


# Top 5 Spenders

In [51]:
#Calculate Purchase Count
purchase_count_sn = pymoli_df['SN'].value_counts()

#Create New DF by Grouping by SN
pymoli_spenders = pymoli_df.groupby(['SN'])

#Create Average Purchase Price
avg_purchase = pymoli_spenders['Price'].mean()

#Create Total Purchase Value
tot_purchase = pymoli_spenders['Price'].sum()

#Create New Data Frame Using Variables Just Calculated
spender_analysis_table = pd.DataFrame({'Purchase Count': purchase_count_sn, 'Average Purchase Price': avg_purchase,
                                       'Total Purchase Value': tot_purchase})

In [52]:
#Sort by total purchase value
highest_spender = spender_analysis_table.sort_values('Total Purchase Value', ascending = False)

#Format Avg Price, Total
highest_spender['Average Purchase Price'] = highest_spender['Average Purchase Price'].map('${:.2f}'.format)
highest_spender['Total Purchase Value'] = highest_spender['Total Purchase Value'].map('${:.2f}'.format)


highest_spender.head()


Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


# Top 5 Most Popular Items (By Purchase Count)

In [53]:
#Create New DF by Grouping by Item ID and Item Name
pymoli_pop_it = pymoli_df.groupby(['Item ID', 'Item Name', 'Price'])

#Create Total Purchase Value
tot_purchase = pymoli_pop_it['Price'].sum()

#Create New Data Frame Using Variables Just Calculated and add in purchase count
pop_item_analysis = pd.DataFrame({'Total Purchase Count': pymoli_pop_it['Purchase ID'].count() ,'Total Purchase Volume': tot_purchase})

In [54]:
#Sort By Purchase Count
popular_items = pop_item_analysis.sort_values('Total Purchase Count', ascending = False)

#Reset Index For Formatting
popular_items.reset_index(inplace=True)

#Format Price, Total Purchase Volume
popular_items['Price'] = popular_items['Price'].map('${:.2f}'.format)
popular_items['Total Purchase Volume'] = popular_items['Total Purchase Volume'].map('${:.2f}'.format)

#print Top 5
popular_items.head()

Unnamed: 0,Item ID,Item Name,Price,Total Purchase Count,Total Purchase Volume
0,178,"Oathbreaker, Last Hope of the Breaking Storm",$4.23,12,$50.76
1,145,Fiery Glass Crusader,$4.58,9,$41.22
2,108,"Extraction, Quickblade Of Trembling Hands",$3.53,9,$31.77
3,82,Nirvana,$4.90,9,$44.10
4,19,"Pursuit, Cudgel of Necromancy",$1.02,8,$8.16


# Top 5 Most Profitable Items (By Total Volume)

In [55]:
#Create New DF by Grouping by Item ID and Item Name and Price
pymoli_profit_it = pymoli_df.groupby(['Item ID', 'Item Name', 'Price'])

#Create Total Purchase Value
tot_purchase = pymoli_profit_it['Price'].sum()

#Create New Data Frame Using Variables Just Calculated and add in purchase count
profit_items_analysis = pd.DataFrame({'Total Purchase Count': pymoli_profit_it['Purchase ID'].count(),
                                    'Total Purchase Volume': tot_purchase})

In [56]:
#Sort By Purchase Volume
profitable_items = profit_items_analysis.sort_values('Total Purchase Volume', ascending = False)

#Reset Index For Formatting
profitable_items.reset_index(inplace = True)

#Format Price, Total Purchase Volume
profitable_items['Price'] = profitable_items['Price'].map('${:.2f}'.format)
profitable_items['Total Purchase Volume'] = profitable_items['Total Purchase Volume'].map('${:.2f}'.format)

#Print Top 5
profitable_items.head()

Unnamed: 0,Item ID,Item Name,Price,Total Purchase Count,Total Purchase Volume
0,178,"Oathbreaker, Last Hope of the Breaking Storm",$4.23,12,$50.76
1,82,Nirvana,$4.90,9,$44.10
2,145,Fiery Glass Crusader,$4.58,9,$41.22
3,92,Final Critic,$4.88,8,$39.04
4,103,Singed Scalpel,$4.35,8,$34.80
