In [2]:
import pandas as pd

In [3]:
resource_file_path = "Resources/purchase_data.csv"
# Check the encoding
with open(resource_file_path) as f:
    print(f)

<_io.TextIOWrapper name='Resources/purchase_data.csv' mode='r' encoding='UTF-8'>


In [4]:
df = pd.read_csv(resource_file_path)
print(df.loc[df["SN"] == "Adairialis76"])
df.head()

     Purchase ID            SN  Age Gender  Item ID          Item Name  Price
467          467  Adairialis76   16   Male      123  Twilight's Carver   2.28


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [5]:
#Check if data needs to be cleaned
df.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [6]:
# Count total number of players

grouped_by_players = df.groupby("SN")
players_number = len(grouped_by_players)
print(f"Total number of players: {players_number}")

Total number of players: 576


In [7]:
### Purchasing Analysis (Total)
# * Number of Unique Items
unique_items = df["Item Name"].unique()
unique_items_number = len(unique_items)
print(f"Number of Unique Items: {unique_items_number}")

Number of Unique Items: 179


In [8]:
# * Average Purchase Price
average_purchase_price = round(df["Price"].mean(),2)
print(f"Average Purchase Price: ${average_purchase_price}")

# * Total Number of Purchases
total_number_purchases = len(df["Purchase ID"])
print(f"Total Number of Purchases: {total_number_purchases}")

# * Total Revenue
total_revenue = df["Price"].sum()
print(f"Total_revenue: ${total_revenue}")


Average Purchase Price: $3.05
Total Number of Purchases: 780
Total_revenue: $2379.77


In [9]:
### Gender Demographics

# Create function whith argument that should specify gender from Capital letter
def print_count_players_by_gender(gender):
    if gender in ["Female", "Male"]:
        gender_df = df.loc[df['Gender'] == gender]
    else: gender_df = df.loc[(df['Gender'] != "Female")&(df['Gender'] != "Male")
]
    gender_players_count = len(gender_df.groupby("SN"))
    gender_players_percentage = round(gender_players_count/players_number*100, 2)
    print(f"Percentage and Count of {gender} Players: {gender_players_percentage}% ({gender_players_count})")

def count_players_by_gender(gender):
    if gender in ["Female", "Male"]:
        gender_df = df.loc[df['Gender'] == gender]
    else: gender_df = df.loc[(df['Gender'] != "Female")&(df['Gender'] != "Male")
]
    gender_players_count = len(gender_df.groupby("SN"))
    gender_players_percentage = round(gender_players_count/players_number*100, 2)
    return gender_players_count

# Call the function with different parameters
# * Percentage and Count of Male Players
print_count_players_by_gender("Male")

# * Percentage and Count of Female Players
print_count_players_by_gender("Female")

# * Percentage and Count of Other / Non-Disclosed
print_count_players_by_gender("Non-Disclosed gender")


Percentage and Count of Male Players: 84.03% (484)
Percentage and Count of Female Players: 14.06% (81)
Percentage and Count of Non-Disclosed gender Players: 1.91% (11)


In [42]:
### Purchasing Analysis (Gender)
# * The below each broken by gender
#   * Purchase Count
purchase_count_by_gender = df[["Gender", "Purchase ID"]].groupby(["Gender"]).count()

male_purchases = purchase_count_by_gender['Purchase ID'][1]
female_purchases = purchase_count_by_gender['Purchase ID'][0]
otherGender_purchases = purchase_count_by_gender['Purchase ID'][2]

print(f"\nPurchase Count By Gender:\nMale: {male_purchases}\nFemale: {female_purchases}\nNon-Disclosed Gender: {otherGender_purchases}")

#   * Average Purchase Price
purchase_price_by_gender = df[["Gender", "Price"]].groupby(["Gender"]).sum()

male_average_purchase = round(purchase_price_by_gender['Price'][1]/count_players_by_gender("Male"),2)
female_average_purchase = round(purchase_price_by_gender['Price'][0]/count_players_by_gender("Female"),2)
otherGender_average_purchase = round(purchase_price_by_gender['Price'][2]/count_players_by_gender("Other"),2)

print(f"\nAverage Purchase Price By Gender:\nMale: {male_average_purchase}\nFemale: {female_average_purchase}\nNon-Disclosed Gender: {otherGender_average_purchase}")

#   * Total Purchase Value
print(f"\nTotal Purchase Value: {purchase_price_by_gender}")

#   * Average Purchase Total per Person by Gender
# At first apply groupby by both columns - Gender and SD to find out how many purchases did each player
purchase_per_person = df[["Gender", "SN", "Purchase ID"]].groupby(["Gender", "SN"]).count()
# print(purchase_per_person.head())

# Then use group by only by gender to find the average purshase count per person by gender
average_purchase_per_person = round(purchase_per_person.groupby("Gender").mean(), 2)
print(f"Average Purchase Total per Person by Gender: \n{average_purchase_per_person}")


Purchase Count By Gender:
Male: 652
Female: 113
Non-Disclosed Gender: 15

Average Purchase Price By Gender:
Male: 4.07
Female: 4.47
Non-Disclosed Gender: 4.56

Total Purchase Value:                          Price
Gender                        
Female                  361.94
Male                   1967.64
Other / Non-Disclosed    50.19
Average Purchase Total per Person by Gender: 
                       Purchase ID
Gender                            
Female                        1.40
Male                          1.35
Other / Non-Disclosed         1.36


In [78]:
### Age Demographics
# * The below each broken into bins of 4 years (i.e. &lt;10, 10-14, 15-19, etc.)
min_age = df["Age"].min()

max_age = df["Age"].max()

bins = [x for x in range(min_age, max_age+3, 4)]
bins_labels = ["[7-11)", "[11-15)", "[15-19)", "[19-23)", "[23-27)",
               "[27-31)", "[31-35)", "[35-39)", "[39-43)", "[43-47]"]

#   * Purchase Count
purchase_by_age_bins = df
purchase_by_age_bins["Age Group"] = pd.cut(df["Age"], bins, labels = bins_labels, include_lowest = True)
purchase_count_by_age_bins = purchase_by_age_bins[["Age Group", "Purchase ID"]].groupby("Age Group").count()
print(f"\nPurchase Count by Age Bins: \n{purchase_count_by_age_bins}")

# #   * Average Purchase Price
purchase_price_by_age_bins = round(purchase_by_age_bins[["Age Group", "Price"]].groupby("Age Group").mean(),2)
print(f"\nAverage Purchase Price by Age Bins: \n {purchase_price_by_age_bins}")

# #   * Total Purchase Value
purchase_total_value_by_age_bins = round(purchase_by_age_bins[["Age Group", "Price"]].groupby("Age Group").sum(),2)
print(f"\nTotal Purchase Value by Age Bins: \n {purchase_total_value_by_age_bins}")

#   * Average Purchase Total per Person by Age Group
# As column Age Group is considered as category type, we need to get rid of the NaN values
purchase_sn = purchase_by_age_bins[["Age Group", "SN", "Purchase ID"]].groupby(["SN", "Age Group"]).count().dropna()
purchase_total_by_age_bins = round(purchase_sn.groupby("Age Group").mean(),2)
print(f"\nAverage Purchase Total per Person by Age Group: \n {purchase_total_by_age_bins}")


Purchase Count by Age Bins: 
           Purchase ID
Age Group             
[7-11)              39
[11-15)             47
[15-19)            101
[19-23)            298
[23-27)            150
[27-31)             60
[31-35)             45
[35-39)             27
[39-43)             10
[43-47]              3

Average Purchase Price by Age Bins: 
            Price
Age Group       
[7-11)      3.28
[11-15)     2.93
[15-19)     3.04
[19-23)     3.03
[23-27)     3.06
[27-31)     2.97
[31-35)     2.93
[35-39)     3.54
[39-43)     3.12
[43-47]     2.35

Total Purchase Value by Age Bins: 
             Price
Age Group        
[7-11)     127.75
[11-15)    137.81
[15-19)    307.24
[19-23)    903.84
[23-27)    459.54
[27-31)    178.05
[31-35)    131.66
[35-39)     95.64
[39-43)     31.18
[43-47]      7.06

Average Purchase Total per Person by Age Group: 
            Purchase ID
Age Group             
[7-11)            1.30
[11-15)           1.34
[15-19)           1.25
[19-23)           1.42
[23-27)  

In [None]:
### Top Spenders

# * Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
#   * SN
#   * Purchase Count
#   * Average Purchase Price
#   * Total Purchase Value