In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Locate and read file
file = "Resources/purchase_data.csv"
df = pd.read_csv(file)
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
## Player Count
print("## Player Count")

player_count = df["SN"].nunique()
print("The total number of players is",player_count)

## Player Count
The total number of players is 576


In [4]:
## Purchasing Analysis (Total)
print("## Purchasing Analysis")

# Number of unique items
items = df["Item ID"].nunique()

# Average purchase price
avg_purchase = round(df["Price"].mean(), 2)

# Total number of purchases
total_purchases = df["Purchase ID"].count()

# Total revenue
revenue = df["Price"].sum()

# Generate output summary
output_df = pd.DataFrame({
    "Available Items":[items],
    "Average Purchase Price":[avg_purchase],
    "Total Purchases":[total_purchases],
    "Total Revenue":[revenue]
})
output_df

## Purchasing Analysis


Unnamed: 0,Available Items,Average Purchase Price,Total Purchases,Total Revenue
0,179,3.05,780,2379.77


In [5]:
## Gender Demographics

gender_only_df = df.drop_duplicates(subset="SN", keep="first")
gender = gender_only_df["Gender"].value_counts()

print("Number of players per gender")
gender

Number of players per gender


Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [6]:
percent_gender = round(gender_only_df["Gender"].value_counts()/player_count*100, 2)
print("Percentage of players per gender")
percent_gender

Percentage of players per gender


Male                     84.03
Female                   14.06
Other / Non-Disclosed     1.91
Name: Gender, dtype: float64

In [7]:
## Purchasing Analysis (Gender)
print("Purchasing Analysis (Gender)")

# Group the purchases by gender for analysis
by_gender_df = df.groupby(["Gender"])

# Number of purchases per gender
by_gender_df["Purchase ID"].count()

Purchasing Analysis (Gender)


Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64

In [8]:
# Average purchase price per gender
round(by_gender_df["Price"].mean(), 2)

Gender
Female                   3.20
Male                     3.02
Other / Non-Disclosed    3.35
Name: Price, dtype: float64

In [9]:
# Total purchase value per gender
by_gender_df["Price"].sum()

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [19]:
# Average purchase total per person per gender - not sure I'm interpreting this correctly
#by_gender_df["Price"].sum()/by_gender_df["Purchase ID"].count()

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
dtype: float64

In [11]:
## Age Demographics
print("Age Demographics")

# Create bins and labels for the age ranges
bins = [0, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47]

age_groups = ["under 10", "11-14", "15-18", "19-22", "23-26", "27-30", "31-34", "35-38", "39-42", "43-46"]

# Determine an age group bin for each row
df["Age Group"] = pd.cut(df["Age"], bins, labels=age_groups, include_lowest=True)
df


Age Demographics


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,19-22
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,39-42
2,2,Ithergue48,24,Male,92,Final Critic,4.88,23-26
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,23-26
4,4,Iskosia90,23,Male,131,Fury,1.44,19-22
...,...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54,19-22
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63,19-22
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46,19-22
778,778,Sisur91,7,Male,92,Final Critic,4.19,under 10


In [14]:
# Group the data by Age Group
age_group_df = df.groupby("Age Group")

# Count the number of purchases per age group
age_group_df["Purchase ID"].count()

Age Group
under 10     39
11-14        47
15-18       101
19-22       298
23-26       150
27-30        60
31-34        45
35-38        27
39-42        10
43-46         3
Name: Purchase ID, dtype: int64

In [16]:
# Determine the average purchase price per age group
round(age_group_df["Price"].mean(), 2)

Age Group
under 10    3.28
11-14       2.93
15-18       3.04
19-22       3.03
23-26       3.06
27-30       2.97
31-34       2.93
35-38       3.54
39-42       3.12
43-46       2.35
Name: Price, dtype: float64

In [17]:
# Determine the total revenue per age group
age_group_df["Price"].sum()

Age Group
under 10    127.75
11-14       137.81
15-18       307.24
19-22       903.84
23-26       459.54
27-30       178.05
31-34       131.66
35-38        95.64
39-42        31.18
43-46         7.06
Name: Price, dtype: float64

In [18]:
# Average purchase total per person by age group - not sure I'm interpreting this correctly
age_group_df["Price"].sum()/age_group_df["Purchase ID"].count()

Age Group
under 10    3.275641
11-14       2.932128
15-18       3.041980
19-22       3.033020
23-26       3.063600
27-30       2.967500
31-34       2.925778
35-38       3.542222
39-42       3.118000
43-46       2.353333
dtype: float64