In [1]:
# Dependencies and Setup
import pandas as pd

In [2]:
# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

In [4]:
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)


In [5]:
#Player information 

#Identify individual players

player_df = purchase_data.drop_duplicates(subset='SN', keep='first')
player_count = len(player_df)

player_count_series =pd.DataFrame(
    {"Total Players" : player_count},
index=[0])
player_count_series

Unnamed: 0,Total Players
0,576


In [6]:
#Item Analysis

item_df = purchase_data.drop_duplicates(subset='Item ID', keep='first')
item_count = len(item_df)

average_item_price = round(item_df['Price'].mean(),2)
average_item_price

purchase_count = len(purchase_data)
total_revenue = purchase_data['Price'].sum()

purchase_analysis_df = pd.DataFrame(
    {
        "Number of Unique items" : item_count,
        "Average Price" : '$'+str(average_item_price),
        "Number of Purchases" : purchase_count,
        "Total Revenue" : '$'+str(total_revenue)
    },
index=[0])
purchase_analysis_df

Unnamed: 0,Number of Unique items,Average Price,Number of Purchases,Total Revenue
0,179,$3.04,780,$2379.77


In [7]:
#Gender Denographics
gender_df = player_df.Gender.value_counts().to_frame()
gender_df['Percentage of Players'] = ((gender_df['Gender']/player_count*100).round(2)).astype(str)+'%'
gender_df = gender_df.rename(columns= {'Gender' : 'Total Count'})
gender_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [8]:
# Purchase analysis (Gender)

#Female purchases
fem_purchases_df = purchase_data.loc[purchase_data['Gender'] == 'Female', :]
fem_av_purprice = round(fem_purchases_df['Price'].mean(),2)
fem_total_purprice = round(fem_purchases_df['Price'].sum(),2)

#Purchases per female player 
#Price value now becomes the total spent by female player
fem_df2 = fem_purchases_df.groupby('SN')['Price'].sum().reset_index()
av_perfemale = round(fem_df2['Price'].mean(),2)

#Male purchases
male_purchases_df = purchase_data.loc[purchase_data['Gender'] == 'Male', :]
male_av_purprice = round(male_purchases_df['Price'].mean(),2)
male_total_purprice = round(male_purchases_df['Price'].sum(),2)

#Purchases per male player 
#Price value now becomes the total spent by male player
male_df2 = male_purchases_df.groupby('SN')['Price'].sum().reset_index()
av_permale = round(male_df2['Price'].mean(),2)

#Other purchases
o_purchases_df = purchase_data.loc[purchase_data['Gender'] == 'Other / Non-Disclosed', :]
o_av_purprice = round(o_purchases_df['Price'].mean(),2)
o_total_purprice = round(o_purchases_df['Price'].sum(),2)

#Purchases per female player 
#Price value now becomes the total spent by female player
o_df2 = o_purchases_df.groupby('SN')['Price'].sum().reset_index()
av_pero = round(o_df2['Price'].mean(),2)

gender=['Female','Male','Other / Non-Disclosed']

gpurchase_analysis_df = pd.DataFrame(
    {
        "Purchase Count" : [len(fem_purchases_df),len(male_purchases_df),len(o_purchases_df)],
        "Average Purchase Price" : [('$'+str(fem_av_purprice)),('$'+str(male_av_purprice)),('$'+str(o_av_purprice))],
        "Total Purchase Value" : [('$'+str(fem_total_purprice)),('$'+str(male_total_purprice)),('$'+str(o_total_purprice))],
        "Avg Total Purchase per Person" : [('$'+str(av_perfemale)),('$'+str(av_permale)),('$'+str(av_pero))]
    },
index=gender)
gpurchase_analysis_df

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Female,113,$3.2,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [9]:
#Age Demographics

age_bins = [0,9,14,19,24,29,34,39,101]
age_categories = ['<10','10-14','15-19','20-24','25-29','30-34','35-40','40+']

age_cat = (pd.cut(player_df['Age'], age_bins, labels=age_categories)).value_counts()
age_demo_df = age_cat.to_frame()
age_demo_df = age_demo_df.reindex(age_categories)
age_demo_df = age_demo_df.rename(columns={'Age':'Total Count'})
age_demo_df['Percentage of Players'] = round(age_demo_df['Total Count']/player_count*100,2)
age_demo_df['Percentage of Players'] = age_demo_df['Percentage of Players'].astype(str)+'%'
age_demo_df

Unnamed: 0,Total Count,Percentage of Players
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-40,31,5.38%
40+,12,2.08%


In [10]:
#Purchage Analysis (Age)

purchase_data['Age Ranges'] = pd.cut(purchase_data['Age'], age_bins, labels=age_categories)

age_purchase_count = purchase_data['Age Ranges'].value_counts()
age_purch_df = age_purchase_count.to_frame()
age_purch_df = age_purch_df.reindex(age_categories)
age_purch_df = age_purch_df.rename(columns={'Age Ranges':'Purchase Count'})

age_totalpurch_df = purchase_data[['Age Ranges','Price']]
age_totalpurch_df = age_totalpurch_df.groupby('Age Ranges')['Price'].sum().reset_index()
age_totalpurch_df = age_totalpurch_df.set_index('Age Ranges')

age_purchase_df = age_totalpurch_df.merge(age_purch_df, right_index=True, left_index=True, how='outer')
age_purchase_df = age_purchase_df.merge(age_demo_df, right_index=True, left_index=True, how='outer' )
age_purchase_df['Average Purchase Price'] = round(age_purchase_df['Price']/age_purchase_df['Purchase Count'],2)
age_purchase_df['Avg Total Purchase per Person'] = round(age_purchase_df['Price']/age_purchase_df['Total Count'],2)
age_purchase_df['Average Purchase Price'] = '$'+age_purchase_df['Average Purchase Price'].astype(str)
age_purchase_df['Avg Total Purchase per Person'] = '$'+age_purchase_df['Avg Total Purchase per Person'].astype(str)
age_purchase_df['Price'] = round(age_purchase_df['Price'],2)
age_purchase_df = age_purchase_df.rename(columns={'Price': 'Total Purchase Volume'})
age_purchase_df['Total Purchase Value'] = '$'+age_purchase_df['Total Purchase Volume'].astype(str)
age_purchase_df = age_purchase_df[['Purchase Count','Average Purchase Price','Total Purchase Value','Avg Total Purchase per Person']]
age_purchase_df



Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,$1114.06,$4.32
25-29,101,$2.9,$293.0,$3.81
30-34,73,$2.93,$214.0,$4.12
35-40,41,$3.6,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [11]:
# Top Spenders
sn_df = purchase_data.groupby('SN')['Price'].sum().reset_index()
top_spenders_df = ((sn_df.sort_values(by='Price', ascending = False)).head(5)).reset_index()

top_spenders_df['Purchase Count'] = [
    (purchase_data['SN'].value_counts()[top_spenders_df['SN'][0]]),
    (purchase_data['SN'].value_counts()[top_spenders_df['SN'][1]]),
    (purchase_data['SN'].value_counts()[top_spenders_df['SN'][2]]),
    (purchase_data['SN'].value_counts()[top_spenders_df['SN'][3]]),
    (purchase_data['SN'].value_counts()[top_spenders_df['SN'][4]])
]
top_spenders_df = top_spenders_df.rename(columns={'Price' :'Total Purchase Value'})
top_spenders_df['Total Purchase Value'] = round(top_spenders_df['Total Purchase Value'],2)
top_spenders_df['Average Purchase Price'] = round(top_spenders_df['Total Purchase Value']/top_spenders_df['Purchase Count'],2)
top_spenders_df['Average Purchase Price'] = '$'+(top_spenders_df['Average Purchase Price']).astype(str)
top_spenders_df['Total Purchase Value'] = '$'+(top_spenders_df['Total Purchase Value']).astype(str)
top_spenders_df = top_spenders_df.set_index('SN')
top_spenders_df = top_spenders_df[['Purchase Count','Average Purchase Price', 'Total Purchase Value']]
top_spenders_df 



Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.4,$13.62
Iskadarya95,3,$4.37,$13.1


In [45]:
# Popular Items
it_df = purchase_data.groupby(['Item ID','Item Name'])['Price'].sum().reset_index()
popular_items_df = ((it_df.sort_values(by='Price', ascending = False)).head(5)).reset_index()
popular_items_df = popular_items_df.rename(columns={'Price':'Total Purchase Value'})
popular_items_df['Total Purchase Value'] = round(popular_items_df['Total Purchase Value'],2)
popular_items_df['Purchase Count'] = [
    (purchase_data['Item ID'].value_counts()[popular_items_df['Item ID'][0]]),
    (purchase_data['Item ID'].value_counts()[popular_items_df['Item ID'][1]]),
    (purchase_data['Item ID'].value_counts()[popular_items_df['Item ID'][2]]),
    (purchase_data['Item ID'].value_counts()[popular_items_df['Item ID'][3]]),
    (purchase_data['Item ID'].value_counts()[popular_items_df['Item ID'][4]]),
    
]
popular_items_df['Total Purchase Value'] = '$'+(popular_items_df['Total Purchase Value']).astype(str)

popular_items_df = popular_items_df.set_index('Item ID')
popular_items_df = popular_items_df[['Item Name', 'Purchase Count','Total Purchase Value']]
popular_items_df

Unnamed: 0_level_0,Item Name,Purchase Count,Total Purchase Value
Item ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
92,Final Critic,13,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$50.76
82,Nirvana,9,$44.1
145,Fiery Glass Crusader,9,$41.22
103,Singed Scalpel,8,$34.8


In [41]:



'''
popular_items_df['Item Price'] = [
    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][0]]),['Price']]),
    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][1]]),['Price']]),
    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][2]]),['Price']]),
    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][3]]),['Price']]),
    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][4]]),['Price']])
]

#it =(purchase_data.loc[purchase_data['Item ID'] == [popular_items_df['Item ID'][0]],['Price']])
#popular_items_df

'''

"\npopular_items_df['Item Price'] = [\n    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][0]]),['Price']]),\n    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][1]]),['Price']]),\n    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][2]]),['Price']]),\n    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][3]]),['Price']]),\n    (purchase_data.loc[purchase_data['Item ID'] == ([popular_items_df['Item ID'][4]]),['Price']])\n]\n\n#it =(purchase_data.loc[purchase_data['Item ID'] == [popular_items_df['Item ID'][0]],['Price']])\n#popular_items_df\n\n"