In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data_df = pd.read_csv(file_to_load)
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [2]:
total_players = purchase_data_df["SN"].nunique()
print(f"Number of Total Players = {total_players}") 

Number of Total Players = 576


In [3]:
unique_items = purchase_data_df["Item ID"].nunique()
print(f"Number of Unique Items = {unique_items}") 

Number of Unique Items = 179


In [4]:
average_price = purchase_data_df["Price"].mean()
print(f"Average Price = {average_price}") 

Average Price = 3.050987179487176


In [5]:
number_of_purchases = purchase_data_df["Purchase ID"].nunique()

In [6]:
total_revenue = purchase_data_df["Price"].sum()

In [7]:
# total_players = purchase_data_df["SN"].nunique()
# unique_items = purchase_data_df["Item ID"].nunique()
# average_price = purchase_data_df["Price"].mean()
# number_of_purchases = purchase_data_df["Purchase ID"].nunique()
# total_revenue = purchase_data_df["Price"].sum()
summary_table = {"Total Players": [total_players], 
                 "Unique Items": [unique_items],
                 "Average Price": [f"${round(average_price, 2)}"],
                 "Number of Purchases": [number_of_purchases],
                 "Total Revenue": [f"${float(total_revenue)}"]}
summary_table_df = pd.DataFrame(summary_table)
summary_table_df

Unnamed: 0,Total Players,Unique Items,Average Price,Number of Purchases,Total Revenue
0,576,179,$3.05,780,$2379.77


In [8]:
# Determining total number of male players
male_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Male", :]
male_players = male_df["SN"].nunique()

# Determining percentage of male players
total_players = purchase_data_df["SN"].nunique()
percent_male = (male_players / total_players) * 100
round(percent_male, 2)

84.03

In [9]:
# Determining total number of female players
female_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Female", :]
female_players = female_df["SN"].nunique()

# Determining percentage of female players
total_players = purchase_data_df["SN"].nunique()
percent_female = (female_players / total_players) * 100
round(percent_female, 2)

14.06

In [10]:
# Determine Count of Other / Non-Disclosed
other_players = total_players - (female_players + male_players)
other_players

# Determine percentage of Other / Non-Disclosed
percent_other = (other_players / total_players * 100)
round(percent_other, 2)

1.91

In [11]:
# # Add summary data frame
gender_summary_table = purchase_data_df.groupby(["Gender"])
gender_summary_table_actual = gender_summary_table.nunique()
gender_summary_table_actual.head()
#                        "Percentage of Players", [percent_male]} 
#                  "Unique Items": [unique_items],
#                  "Average Price": [f"${round(average_price, 2)}"],
#                  "Number of Purchases": [number_of_purchases],
#                  "Total Revenue": [f"${float(total_revenue)}"]}
# gender_summary_table_df = pd.DataFrame(gender_summary_table)
# gender_summary_table_df.index = ["Male", "Female", "Other / Non-Disclosed"]


Unnamed: 0_level_0,Purchase ID,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,113,81,22,90,90,79
Male,652,484,39,178,178,144
Other / Non-Disclosed,15,11,8,13,13,12


In [12]:
# Determine total purchases for Males
male_sales = purchase_data_df.loc[purchase_data_df["Gender"] == "Male", :]
male_sales = len(male_sales)
male_sales

652

In [13]:
# Determine total purchases for Females
female_sales = purchase_data_df.loc[purchase_data_df["Gender"] == "Female", :]
female_sales = len(female_sales)
female_sales

113

In [14]:
# Determine total purchases for Other / Non-Disclosed
other_sales = purchase_data_df.loc[purchase_data_df["Gender"] == "Other / Non-Disclosed", :]
other_sales = len(other_sales)
other_sales

15

In [15]:
# Determine Male Average Purchase Price, Total Purchase Value, and Avg Total Purchase per Person
male_avg_price_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Male", ["Price"]]
print(round(male_avg_price_df.mean(),2))
male_avg_purchase = male_avg_price_df.mean()
print(round(male_avg_price_df.sum(),2))
male_total_purchase = male_avg_price_df.sum()
male_avg_total_purchase = male_total_purchase / male_players
print(round(male_avg_total_purchase, 2))

Price    3.02
dtype: float64
Price    1967.64
dtype: float64
Price    4.07
dtype: float64


In [16]:
# Determine Female Average Purchase Price, Total Purchase Value, and Avg Total Purchase per Person
female_avg_price_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Female", ["Price"]]
print(round(female_avg_price_df.mean(),2))
female_avg_purchase = female_avg_price_df.mean()
print(round(female_avg_price_df.sum(),2))
female_total_purchase = female_avg_price_df.sum()
female_avg_total_purchase = female_total_purchase / female_players
print(round(female_avg_total_purchase, 2))

Price    3.2
dtype: float64
Price    361.94
dtype: float64
Price    4.47
dtype: float64


In [17]:
# Determine Other / Non-Disclosed Average Purchase Price, Total Purchase Value, and Avg Total Purchase per Person
other_avg_price_df = purchase_data_df.loc[purchase_data_df["Gender"] == "Other / Non-Disclosed", ["Price"]]
print(round(other_avg_price_df.mean(),2))
other_avg_purchase = other_avg_price_df.mean()
print(round(other_avg_price_df.sum(),2))
other_total_purchase = other_avg_price_df.sum()
other_avg_total_purchase = other_total_purchase / other_players
print(round(other_avg_total_purchase, 2))

Price    3.35
dtype: float64
Price    50.19
dtype: float64
Price    4.56
dtype: float64


In [18]:
print(purchase_data_df["Age"].max())
print(purchase_data_df["Age"].min())

45
7


In [19]:
# Create bins for values
bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
# Create labels for the bins
group_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

# Slice data and place into bins
pd.cut(purchase_data_df["Age"], bins, labels= group_labels).head()


0    20-24
1      40+
2    20-24
3    20-24
4    20-24
Name: Age, dtype: category
Categories (8, object): ['<10' < '10-14' < '15-19' < '20-24' < '25-29' < '30-34' < '35-39' < '40+']

In [23]:
# Place data series into a new column
purchase_data_df["Age Group"] = pd.cut(purchase_data_df["Age"], bins, labels= group_labels)
purchase_data_df.head()


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [29]:
# Create GroupBy dataframe based on Age Group
age_group_df = purchase_data_df.groupby("Age Group")
# Find how numbers for Age Group
total_count = age_group_df['SN'].nunique()
# Determine age percentages
age_percentage = (total_count/total_players) * 100
# Create new Age Demographics dataframe
age_demographics_df = pd.DataFrame({"Total Count": total_count, "Percentage of Players": round(age_percentage,2)})
age_demographics_df


Unnamed: 0_level_0,Total Count,Percentage of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08


In [43]:
age_group_df.head()
purchase_count = age_group_df['Price'].count()
avg_purchase_price = age_group_df['Price'].sum()/age_group_df['Price'].count()
total_purchase_value = age_group_df['Price'].sum()
avg_purchase_per_person = age_group_df['Price'].sum()/age_group_df['SN'].nunique()
purchase_analysis_age_df = pd.DataFrame({"Purchase Count": purchase_count, 'Average Purchase Price': round(avg_purchase_price,2), 'Total Purchase Value': total_purchase_value, "Avg Total Purchase per Person": round(avg_purchase_per_person,2)})
purchase_analysis_age_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,3.35,77.13,4.54
10-14,28,2.96,82.78,3.76
15-19,136,3.04,412.89,3.86
20-24,365,3.05,1114.06,4.32
25-29,101,2.9,293.0,3.81


In [54]:
sn_group_df = purchase_data_df.groupby("SN")
purchase_count = sn_group_df['Price'].count()
avg_purchase_price = sn_group_df['Price'].sum()/sn_group_df['Price'].count()
total_purchase_value = sn_group_df['Price'].sum()
top_spenders_df = pd.DataFrame({"Purchase Count": purchase_count, 'Average Purchase Price': round(avg_purchase_price,2), 'Total Purchase Value': total_purchase_value})
top_spenders_df = top_spenders_df.sort_values(by='Total Purchase Value', ascending=False)
top_spenders_df.head()


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.79,18.96
Idastidru52,4,3.86,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.4,13.62
Iskadarya95,3,4.37,13.1


In [97]:
id_idname_group_df = purchase_data_df.groupby(['Item ID', 'Item Name'])
purchase_count = id_idname_group_df['Price'].count()
item_price = id_idname_group_df['Price'].sum()/id_idname_group_df['Price'].count()
total_purchase_value = id_idname_group_df['Price'].sum()
most_popular_items_df = pd.DataFrame({"Purchase Count": purchase_count, 'Item Price': item_price, 'Total Purchase Value': total_purchase_value})
most_popular_items_df = most_popular_items_df.sort_values(by="Purchase Count", ascending=False)
# most_popular_items_df = most_popular_items_df.style.format({"Item Price":"${:,.2f}", "Total Purchase Value":"${:,.2f}"})
most_popular_items_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,4.614615,59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,4.23,50.76
145,Fiery Glass Crusader,9,4.58,41.22
132,Persuasion,9,3.221111,28.99
108,"Extraction, Quickblade Of Trembling Hands",9,3.53,31.77
