In [1]:
import pandas as pd

In [2]:
df = pd.read_csv ("resources/purchase_data.csv")

# First look at the data

In [3]:
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [4]:
#Player Count

players = {}
player_count = len(df['SN'].value_counts())
players['Total Players'] = [player_count]
players_df = pd.DataFrame.from_dict(players)
players_df

# Purchasing Analysis

In [5]:

#look at names
df['SN'].value_counts()

#group by names
gby_SN = df.groupby(['SN'])

#total names (actually total purchases, because name is listed for ea purchase)
gby_SN.count()

#calculate unique items
unique_items = len(df['Item ID'].value_counts())

#group by item name
gby_item_name = df.groupby(['Item Name'])

#calculate total revenue
total_revenue = df['Price'].sum()

#calculate number of purchase
number_of_purchases = df['Purchase ID'].count()

#calculate mean price
mean_price = round(gby_item_name['Price'].mean().mean(),2)

#add all values to dict
purchasing = {'number of unique items': [unique_items],
           'Average Price': [mean_price],
           'Total revenue': [total_revenue],
           'Number of Purchases' : [number_of_purchases]}

purchasing_df = pd.DataFrame.from_dict(purchasing)
purchasing_df
                                                                                

Unnamed: 0,number of unique items,Average Price,Total revenue,Number of Purchases
0,179,3.04,2379.77,780


# Gender Demographics

In [6]:
#To count genders, needed to get rid of duplicate entries. Because each purchase by a different person
#is entered individually, with repeats if someone made more than one purchase.

df_uniq = df.sort_values('SN')
df_uniq = df_uniq.drop_duplicates(subset='SN', keep='first')
df.count()
df_uniq.count()

gby_gender_uniq = df_uniq.groupby(['Gender'])
gender_count = gby_gender_uniq['SN'].count()
gender_count

#put all of the values calculated above into the gender demo dict

percent_female = round(gender_count[0]/gender_count.sum() * 100)
percent_male = round(gender_count[1]/gender_count.sum() * 100)
percent_other = round(gender_count[2]/gender_count.sum() * 100)

Gender_demographics = [{'Gender': 'Female', 'Total Count' : gender_count[0], 'Percent': percent_female},
                   {'Gender': 'Male', 'Total Count' : gender_count[1], 'Percent': percent_male},
                    {'Gender': 'Other/non-disclosing', 'Total Count' : gender_count[2], 'Percent': percent_other}]
                       
Gender_demographics_df = pd.DataFrame.from_dict(Gender_demographics)
               
Gender_demographics_df


Unnamed: 0,Gender,Total Count,Percent
0,Female,81,14.0
1,Male,484,84.0
2,Other/non-disclosing,11,2.0


# Purchasing Analysis by Gender

In [7]:
#Analysis by gender


gby_gender = df.groupby(['Gender'])
avgprice_by_gender = round(gby_gender['Price'].mean(),2)
count_by_gender = gby_gender['Price'].count()
total_by_gender = gby_gender['Price'].sum()

female_avg_purch = round(total_by_gender[0]/gender_count[0],2) # calculated in above section
male_avg_purch = round(total_by_gender[1]/gender_count[1],2)
OND_avg_purch = round(total_by_gender[2]/gender_count[2],2)

# add avg prices by gender to dict
gender_analysis = [{'Gender':'Female', 'Average Purchase Price': avgprice_by_gender[0],
                    'Total Purchases':round(total_by_gender[0]), 'Avg Total per person': female_avg_purch},
                   {'Gender':'Male', 'Average Purchase Price': avgprice_by_gender[1],
                    'Total Purchases':round(total_by_gender[1]), 'Avg Total per person': male_avg_purch},
                   {'Gender':'Other/ND', 'Average Purchase Price': avgprice_by_gender[2],
                    'Total Purchases':round(total_by_gender[2]), 'Avg Total per person': OND_avg_purch}]
                   
gender_analysis_df = pd.DataFrame.from_dict(gender_analysis)
               
gender_analysis_df



Unnamed: 0,Gender,Average Purchase Price,Total Purchases,Avg Total per person
0,Female,3.2,362.0,4.47
1,Male,3.02,1968.0,4.07
2,Other/ND,3.35,50.0,4.56


# Age demographics