In [3]:
import pandas as pd

In [4]:
pymoli_file = "./Resources/purchase_data.csv"
pymoli_file_df = pd.read_csv(pymoli_file, encoding="ISO-8859-1")
pymoli_file_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [6]:
bins = [0, 9.5, 14.5, 19.5, 24.5, 29.5, 34.5, 39.5, 100]
age_bins = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
pymoli_file_df["Age Range"] = pd.cut(pymoli_file_df["Age"], bins, labels=age_bins, include_lowest=True)
pymoli_file_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Range
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [7]:
sn_count = len(pymoli_file_df["SN"].unique())
print("Total Number of Players: " + str(sn_count))

Total Number of Players: 576


In [8]:
player_count_dict = [{"Total Players": sn_count}]
player_count_df = pd.DataFrame(player_count_dict)
player_count_df

Unnamed: 0,Total Players
0,576


In [9]:
number_unique_items = len(pymoli_file_df["Item Name"].unique())
num_purchases = pymoli_file_df["Price"].count()
sum_purchases = pymoli_file_df["Price"].sum()
avg_purchase_price = round(pymoli_file_df["Price"].sum()/pymoli_file_df["Price"].count())

In [10]:
purchasing_dict = [{"Number of Unique Items": number_unique_items, "Average Purchase Price": avg_purchase_price,
                    "Total Number of Purchases": num_purchases,"Total Revenue": sum_purchases}]
purchase_analysis_df = pd.DataFrame(purchasing_dict)
purchase_analysis_df

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,179,3.0,780,2379.77


In [11]:
gender_df = pymoli_file_df[["SN", "Gender"]].drop_duplicates()

In [12]:
gender_series = gender_df["Gender"].value_counts()

In [13]:
#group by gender
#class demo (day 2 - UFO sightings)
gender_demographics_df = pd.DataFrame({"Total Count": gender_series, 
                                       "Percentage of Players": gender_series/sn_count})
gender_demographics_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,0.840278
Female,81,0.140625
Other / Non-Disclosed,11,0.019097


In [29]:
#purchase analysis
#dont want to remove duplicates for this (except for avg purchase by person)
# print(sum_purchases)
# print(num_purchases)
# avg_purchase = sum_purchases/num_purchases
# print(avg_purchase)
# avg_person_purchase = sum_purchases/sn_count
# print(avg_person_purchase)

2379.77
780
3.0509871794871795
4.131545138888889


In [14]:
grouped_gender_analysis = pymoli_file_df.groupby(["Gender"])
grouped_gender_analysis

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001F27CE4D4F0>

In [15]:
purchase_count = grouped_gender_analysis["Price"].count()
purchase_count

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Price, dtype: int64

In [16]:
total_purchase = grouped_gender_analysis["Price"].sum()
total_purchase

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [17]:
avg_purchase_price = total_purchase/purchase_count
avg_purchase_price

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [76]:
# avg_by_person = total_purchase/purchase_count
# avg_by_person

In [18]:
gender_analysis = pd.DataFrame({"Purchase Count": purchase_count, 
                                "Average Purchase Price": avg_purchase_price,
                                "Total Purchase Value" : total_purchase})
gender_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,113,3.203009,361.94
Male,652,3.017853,1967.64
Other / Non-Disclosed,15,3.346,50.19


In [20]:
age_group = pymoli_file_df.groupby("Age Range")
age_group


<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001F27CE4D370>

In [24]:
age_df = pymoli_file_df[["SN", "Age Range"]].drop_duplicates()

In [26]:
age_series = age_df["Age Range"].value_counts()
age_series

20-24    258
15-19    107
25-29     77
30-34     52
35-39     31
10-14     22
<10       17
40+       12
Name: Age Range, dtype: int64

In [27]:
#group by age
#class demo (day 2 - UFO sightings)
#need to sort this by age group
age_group_df = pd.DataFrame({"Total Count": age_series, 
                            "Percentage of Players": age_series/sn_count})
age_group_df

Unnamed: 0,Total Count,Percentage of Players
20-24,258,0.447917
15-19,107,0.185764
25-29,77,0.133681
30-34,52,0.090278
35-39,31,0.053819
10-14,22,0.038194
<10,17,0.029514
40+,12,0.020833
