In [1]:
#Import Dependencies
import pandas as pd

In [52]:
#Locate and read file
file = "Resources/purchase_data.csv"
df = pd.read_csv(file)
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


# Player Count

In [59]:
#Determine number of unique players
player_count = df["SN"].nunique()
print("The total number of players is",player_count)

The total number of players is 576


## Purchasing Analysis (Total)

In [74]:
#Number of unique items
items = df["Item ID"].nunique()

#Average purchase price
avg_purchase = round(df["Price"].mean(), 2)

#Total number of purchases
total_purchases = df["Purchase ID"].count()

#Total revenue
revenue = df["Price"].sum()

#Generate output summary
purchase_analysis_df = pd.DataFrame({
    "Unique Items":[items],
    "Average Purchase Price":[avg_purchase],
    "Total Purchases":[total_purchases],
    "Total Revenue":[revenue]
})

#Format output

#Print dataframe
purchase_analysis_df

Unnamed: 0,Unique Items,Average Purchase Price,Total Purchases,Total Revenue
0,179,3.05,780,2379.77


## Gender Demographics

In [88]:
#Simplify database by deleting duplicate screennames
gender_only_df = df.drop_duplicates(subset="SN", keep="first")

#Count the number of players for each gender
gender = gender_only_df["Gender"].value_counts()

#Calculate the percent of players for each gender
percent_gender = round(gender_only_df["Gender"].value_counts()/player_count*100, 2)

#Display the data in a dataframe
gender_demographics_df = pd.DataFrame({
    "Number of players per gender":gender,
    "Percentage of players per gender":percent_gender
})

#Format the data
gender_demographics_df["Percentage of players per gender"] = gender_demographics_df["Percentage of players per gender"].map("{:.2f}".format)

gender_demographics_df

Unnamed: 0,Number of players per gender,Percentage of players per gender
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


## Purchasing Analysis (Gender)

In [86]:
# Group the purchases by gender for analysis
by_gender_df = df.groupby(["Gender"])

# Number of purchases per gender
purchase_count_gender = by_gender_df["Purchase ID"].count()

# Average purchase price per gender
average_purchase_price_gender = round(by_gender_df["Price"].mean(), 2)

# Total purchase value per gender
total_purchase_value_gender = by_gender_df["Price"].sum()

# Average purchase total per person per gender - not sure I'm interpreting this correctly
purchase_total_per_person_gender = round(by_gender_df["Price"].sum()/gender, 2)

#Display the data in a dataframe
purchasing_analysis_gender_df = pd.DataFrame({
    "Purchases":purchase_count_gender,
    "Average Price":average_purchase_price_gender,
    "Total Value":total_purchase_value_gender,
    "Average Total per Person":purchase_total_per_person_gender
    
})

#Format data
purchasing_analysis_gender_df["Average Price"] = purchasing_analysis_gender_df["Average Price"].map("${:.2f}".format)
purchasing_analysis_gender_df["Total Value"] = purchasing_analysis_gender_df["Total Value"].map("${:.2f}".format)
purchasing_analysis_gender_df["Average Total per Person"] = purchasing_analysis_gender_df["Average Total per Person"].map("${:.2f}".format)


#Print output
purchasing_analysis_gender_df

Unnamed: 0_level_0,Purchases,Average Price,Total Value,Average Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age Demographics

In [11]:
# Create bins and labels for the age ranges
bins = [0, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47]

age_groups = ["under 10", "11-14", "15-18", "19-22", "23-26", "27-30", "31-34", "35-38", "39-42", "43-46"]

# Determine an age group bin for each row
df["Age Group"] = pd.cut(df["Age"], bins, labels=age_groups, include_lowest=True)
df["Age Group"].value_counts()


Age Demographics


19-22       298
23-26       150
15-18       101
27-30        60
11-14        47
31-34        45
under 10     39
35-38        27
39-42        10
43-46         3
Name: Age Group, dtype: int64

In [12]:
# Group the data by Age Group
age_group_df = df.groupby("Age Group")

# Count the number of purchases per age group
purchases_age = age_group_df["Purchase ID"].count()

# Determine the average purchase price per age group
average_purchase_age = round(age_group_df["Price"].mean(), 2)

# Determine the total revenue per age group
total_revenue_age = age_group_df["Price"].sum()

# Average purchase total per person by age group - not sure I'm interpreting this correctly
average_per_person_age = round(age_group_df["Price"].sum()/age_group_df["Age Group"].value_counts(), 2)

#Display the data in a dataframe


Age Group
under 10     39
11-14        47
15-18       101
19-22       298
23-26       150
27-30        60
31-34        45
35-38        27
39-42        10
43-46         3
Name: Purchase ID, dtype: int64

Age Group
under 10    3.28
11-14       2.93
15-18       3.04
19-22       3.03
23-26       3.06
27-30       2.97
31-34       2.93
35-38       3.54
39-42       3.12
43-46       2.35
Name: Price, dtype: float64

Age Group
under 10    127.75
11-14       137.81
15-18       307.24
19-22       903.84
23-26       459.54
27-30       178.05
31-34       131.66
35-38        95.64
39-42        31.18
43-46         7.06
Name: Price, dtype: float64

Age Group  Age Group
under 10   under 10     3.28
11-14      11-14        2.93
15-18      15-18        3.04
19-22      19-22        3.03
23-26      23-26        3.06
27-30      27-30        2.97
31-34      31-34        2.93
35-38      35-38        3.54
39-42      39-42        3.12
43-46      43-46        2.35
dtype: float64

## Top Spenders

In [57]:
#Group by screenname to determine the top spender
sn_df = df.groupby(["SN"])

#Number of purchases per player
purchase_count_sn = sn_df["Purchase ID"].count()

#Average purchase price per player
average_purchase_sn = sn_df["Price"].mean()

#Total purchase value per player
total_purchase_sn = sn_df["Price"].sum()

#Add new data to a dataframe
top_spenders_df = pd.DataFrame({
    "Number of Purchases":purchase_count_sn, 
    "Average Purchase Price":average_purchase_sn,
    "Total Purchase Amount":total_purchase_sn,
})

#Format values in the table


#Sort based on the total purchase value
top_spenders_df = top_spenders_df.sort_values(["Total Purchase Amount"], ascending=False)
top_spenders_df.head(5)

Unnamed: 0_level_0,Number of Purchases,Average Purchase Price,Total Purchase Amount
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,3.792,18.96
Idastidru52,4,3.8625,15.45
Chamjask73,3,4.61,13.83
Iral74,4,3.405,13.62
Iskadarya95,3,4.366667,13.1


## Most Popular Items

## Most Profitable Items