In [18]:
# Dependencies and Setup
import pandas as pd

# File to Load 
file = "../Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
df = pd.read_csv(file)
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [19]:
# Find total player count
len(set(df["SN"]))

576

In [20]:
# Purchasing analysis (total)
# Run basic calculations to obtain number of unique items, average price, etc.
Unique_Items = len(set(df["Item Name"]))
Average_Cost = df["Price"].mean()
Total_Purchases = len(df["Purchase ID"])
Total_Revenue = df["Price"].sum()

# Create a summary data frame to hold the results
data_items = {
    "Unique Items": [Unique_Items],
    "Average Cost": [Average_Cost],
    "Total Purchases": [Total_Purchases],
    "Total Revenue": [Total_Revenue]
}
summary_df = pd.DataFrame(data_items, columns = ["Unique Items", "Average Cost", "Total Purchases", "Total Revenue"])

# Optional: give the displayed data cleaner formatting
summary_df["Average Cost"] = summary_df["Average Cost"].astype(float).map("${:,.2f}".format)
summary_df["Total Revenue"] = summary_df["Total Revenue"].astype(float).map("${:,.2f}".format)

# Display the summary data frame
summary_df

Unnamed: 0,Unique Items,Average Cost,Total Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [21]:
# Gender Demographics
# Drop duplicate values
unique_player_df = df.drop_duplicates(subset = ["SN"])

#Count of players
new_df = pd.DataFrame(unique_player_df["Gender"].value_counts())
renamed_df = new_df.rename(columns = {"Gender": "Total Count"})

# Percentage of players
new2_df = pd.DataFrame(unique_player_df["Gender"].value_counts(normalize = True)*100)
renamed2_df = new2_df.rename(columns = {"Gender": "Percentage"})

# Combine values into dataframe
gender_summary_df = renamed_df.merge(renamed2_df, how = "outer", left_index = True, right_index = True)

#Format and display dataframe
gender_summary_df["Percentage"] = gender_summary_df["Percentage"].astype(float).map("{:,.2f}%".format)
gender_summary_df

Unnamed: 0,Total Count,Percentage
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [22]:
# Purchasing Analysis (Gender)
# Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
grouped_gender_df = df.groupby(["Gender"])

purchase_count = grouped_gender_df["Purchase ID"].count()
average_price = grouped_gender_df["Price"].mean()
total_value = grouped_gender_df["Price"].sum()

#Filter by gender then groupby person to find average purchase total per person per gender
df_female = df.loc[df["Gender"] == "Female", :].copy()
average_female = df_female.groupby("SN")["Price"].sum().mean()

df_male = df.loc[df["Gender"] == "Male", :].copy()
average_male = df_male.groupby("SN")["Price"].sum().mean()

df_other = df.loc[df["Gender"] == "Other / Non-Disclosed", :].copy()
average_other = df_other.groupby("SN")["Price"].sum().mean()

# Create a summary data frame to hold the results
data2_items = [average_female, average_male, average_other]
gender_summary_df = pd.DataFrame({"Purchase Count": purchase_count, 
                                  "Average Price": average_price,
                                  "Total Value": total_value,
                                  "Average Price per Person": data2_items})
# Optional: give the displayed data cleaner formatting
gender_summary_df["Average Price"] = gender_summary_df["Average Price"].astype(float).map("${:,.2f}".format)
gender_summary_df["Average Price per Person"] = gender_summary_df["Average Price per Person"].astype(float).map("${:,.2f}".format)
gender_summary_df["Total Value"] = gender_summary_df["Total Value"].astype(float).map("${:,.2f}".format)

# Display the summary data frame
gender_summary_df

Unnamed: 0_level_0,Purchase Count,Average Price,Total Value,Average Price per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [23]:
# Age Demographics
# Establish bins for ages
bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 200]
age_groups = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

# Categorize the existing players using the age bins. Hint: use pd.cut()
df["Age Bracket"] = pd.cut(df["Age"], bins, labels = age_groups, include_lowest = True)

# Calculate the numbers and percentages by age group
unique_player_df = df.drop_duplicates(subset = ["SN"])

new_df = pd.DataFrame(unique_player_df["Age Bracket"].value_counts())
renamed_df = new_df.rename(columns = {"Age Bracket": "Player Count"})

new2_df = pd.DataFrame(unique_player_df["Age Bracket"].value_counts(normalize = True)*100)
renamed2_df = new2_df.rename(columns = {"Age Bracket": "Percentage"})

# Create a summary data frame to hold the results
age_summary_df = renamed_df.merge(renamed2_df, how = "outer", left_index = True, right_index = True)

# Format
age_summary_df = age_summary_df.reindex(index = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"])
age_summary_df["Percentage"] = age_summary_df["Percentage"].astype(float).map("%{:,.2f}".format)
# Display Age Demographics Table
age_summary_df

Unnamed: 0,Player Count,Percentage
<10,17,%2.95
10-14,22,%3.82
15-19,107,%18.58
20-24,258,%44.79
25-29,77,%13.37
30-34,52,%9.03
35-39,31,%5.38
40+,12,%2.08


In [24]:
# Purchasing Analysis (Age)
# Bin the purchase_data data frame by age
bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 200]
age_groups = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
df["Age Bracket"] = pd.cut(df["Age"], bins, labels = age_groups, include_lowest = True)

# Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below
grouped_age_df = df.groupby(["Age Bracket"])

purchase_count = grouped_age_df["Purchase ID"].count()
average_price = grouped_age_df["Price"].mean()
total_value = grouped_age_df["Price"].sum()

price_sum = grouped_age_df["Price"].sum()
player_count = age_summary_df["Player Count"]
avg_per_person = price_sum/player_count

# Create a summary data frame to hold the results
age_analysis_df = pd.DataFrame({"Purchase Count": purchase_count, 
                                  "Average Price": average_price,
                                  "Total Value": total_value,
                                  "Average Price per Person": avg_per_person})

# Optional: give the displayed data cleaner formatting
age_analysis_df["Average Price"] = age_analysis_df["Average Price"].astype(float).map("${:,.2f}".format)
age_analysis_df["Average Price per Person"] = age_analysis_df["Average Price per Person"].astype(float).map("${:,.2f}".format)
age_analysis_df["Total Value"] = age_analysis_df["Total Value"].astype(float).map("${:,.2f}".format)

# Display the summary data frame
age_analysis_df

Unnamed: 0_level_0,Purchase Count,Average Price,Total Value,Average Price per Person
Age Bracket,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [28]:
# Top Spenders
# Run basic calculations to obtain the results in the table below
grouped_sn_df = df.groupby(["SN"])
total_price = grouped_sn_df["Price"].sum()
purchase_count = grouped_sn_df["Price"].size()

# Create a a copy dataframe with only unique values to work with
unique_player_df = df.drop_duplicates(subset = ["SN"]).copy()

# Set SN as index
unique_player_df = unique_player_df.set_index("SN")

# Create dummary data frame
unique_player_df["Total Price"] = total_price
unique_player_df["Purchase Count"] = purchase_count
unique_player_df["Average Purchase Price"] = total_price/purchase_count

#Sort by descending total price
top_spenders_df = unique_player_df.sort_values(by = "Total Price", ascending = False)

# Optional: give the displayed data cleaner formatting
top_spenders_df["Total Price"] = top_spenders_df["Total Price"].astype(float).map("${:,.2f}".format)
top_spenders_df["Average Purchase Price"] = top_spenders_df["Average Purchase Price"].astype(float).map("${:,.2f}".format)
top_spenders_df = top_spenders_df.drop(columns = ["Purchase ID","Price", "Age", "Item ID", "Item Name"])
# Display a preview of the summary data frame
top_spenders_df.head()

Unnamed: 0_level_0,Gender,Age Bracket,Total Price,Purchase Count,Average Purchase Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Lisosia93,Male,25-29,$18.96,5,$3.79
Idastidru52,Male,20-24,$15.45,4,$3.86
Chamjask73,Female,20-24,$13.83,3,$4.61
Iral74,Male,20-24,$13.62,4,$3.40
Iskadarya95,Male,20-24,$13.10,3,$4.37


In [38]:
# Most Popular Items
# Retrieve the Item ID, Item Name, and Item Price columns
item_df = df[["Item ID", "Item Name", "Price"]]

# Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value
item_gb = item_df.groupby(["Item ID", "Item Name"])

purchase_count = item_gb.count()
total_purchase_value = item_gb.sum()

# Create a summary data frame to hold the results
#Couldn't create new dataframe because items did not have index so used merge instead
item_working_df = pd.merge(item_df, total_purchase_value, on = ["Item ID", "Item Name"])
item_summary_df = pd.merge(item_working_df, purchase_count, on = ["Item ID", "Item Name"])
item_summary_df.columns = ["Item ID", "Item Name", "Price", "Total Purchase Value", "Purchase Count"]

#Manually getting format given by groupby in dataframe
item_summary_df = item_summary_df.drop_duplicates(subset = ["Item ID"])
item_summary_df = item_summary_df.set_index(["Item ID", "Item Name"])

# Sort the purchase count column in descending order
new_item_summary_df = item_summary_df.sort_values(by = "Purchase Count", ascending = False)

# Optional: give the displayed data cleaner formatting
new_item_summary_df["Price"] = new_item_summary_df["Price"].astype(float).map("${:,.2f}".format)
new_item_summary_df["Total Purchase Value"] = new_item_summary_df["Total Purchase Value"].astype(float).map("${:,.2f}".format)

# Display a preview of the summary data frame
new_item_summary_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price,Total Purchase Value,Purchase Count
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,$4.88,$59.99,13
178,"Oathbreaker, Last Hope of the Breaking Storm",$4.23,$50.76,12
108,"Extraction, Quickblade Of Trembling Hands",$3.53,$31.77,9
82,Nirvana,$4.90,$44.10,9
145,Fiery Glass Crusader,$4.58,$41.22,9


In [40]:
# Most Profitable Items
# Sort the above table by total purchase value in descending order
item_summary_df = item_summary_df.sort_values(by = "Total Purchase Value", ascending = False)

# Optional: give the displayed data cleaner formatting
item_summary_df["Price"] = item_summary_df["Price"].astype(float).map("${:,.2f}".format)
item_summary_df["Total Purchase Value"] = item_summary_df["Total Purchase Value"].astype(float).map("${:,.2f}".format)

#Display a preview of the data frame
item_summary_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Price,Total Purchase Value,Purchase Count
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,$4.88,$59.99,13
178,"Oathbreaker, Last Hope of the Breaking Storm",$4.23,$50.76,12
82,Nirvana,$4.90,$44.10,9
145,Fiery Glass Crusader,$4.58,$41.22,9
103,Singed Scalpel,$4.35,$34.80,8
...,...,...,...,...
28,"Flux, Destroyer of Due Diligence",$1.06,$2.12,2
126,Exiled Mithril Longsword,$2.00,$2.00,1
125,Whistling Mithril Warblade,$1.00,$2.00,2
104,Gladiator's Glaive,$1.93,$1.93,1


In [29]:
print(f"Three Conclusions Drawn from the Data: \n \n1. The majority (84.03%) of purchases are made by males \n2. The most populated age demographic in this data set is the 20-25 range \n3. The item 'Final Critic' (ID #92) was the mosts successful seller in terms of both purchase count and total purchase value")

Three Conclusions Drawn from the Data: 
 
1. The majority (84.03%) of purchases are made by males 
2. The most populated age demographic in this data set is the 20-25 range 
3. The item 'Final Critic' (ID #92) was the mosts successful seller in terms of both purchase count and total purchase value
