In [46]:
# dependencies
import pandas as pd
import numpy as np


In [47]:
# # store filepath 
input_file = "Resources/purchase_data.csv"

# read CSV into Pandas DataFrame
pd_raw_df = pd.read_csv(input_file)

# check file
pd_raw_df.head(20)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
5,5,Yalae81,22,Male,81,Dreamkiss,3.61
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67
8,8,Undjask33,22,Male,21,Souleater,1.1
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58


In [48]:
# Player Count
unique_players = pd_raw_df["SN"].unique()
players_series = pd.Series(unique_players)

players_df = pd.DataFrame({"SN": players_series})
players = players_df.count()
total_players_df = pd.DataFrame({"Total Players": players})
total_players_df = total_players_df.reset_index(drop=True)
total_players_df

Unnamed: 0,Total Players
0,576


In [49]:
## Purchasing Analysis

# number of unique items
unique_items = pd_raw_df["Item Name"].unique()
items_series = pd.Series(unique_items)


items_df = pd.DataFrame({"Item Name": items_series})
items = items_df.count()
total_items_df = pd.DataFrame({"Number of Unique Items": items})
total_items_df = total_items_df.reset_index(drop=True)
total_items_df



Unnamed: 0,Number of Unique Items
0,179


In [50]:
# average price
items_df_grouped = pd_raw_df.groupby("Item Name")
price = items_df_grouped["Price"].mean()
price = price.reset_index(drop=True)
ave_price = (round(price.sum()/items, 2))
ave_price_series = pd.Series(ave_price)
ave_price_df = pd.DataFrame({"Average Price": ave_price_series})
ave_price_df = ave_price_df.reset_index(drop=True)
ave_price_df

Unnamed: 0,Average Price
0,3.04


In [51]:
# number of purchases
unique_items = pd_raw_df["Item Name"].count()
items_series = pd.Series(unique_items)

items_df = pd.DataFrame({"Number of Purchases": items_series})
items_df


Unnamed: 0,Number of Purchases
0,780


In [52]:
# total revenue
total_revenue = pd_raw_df["Price"].sum()
total_revenue_series = pd.Series(total_revenue)

total_revenue_df = pd.DataFrame({"Total Revenue": total_revenue_series})
total_revenue_df

Unnamed: 0,Total Revenue
0,2379.77


In [53]:
# purchasing analysis summary
purchasing_analysis_list = [total_items_df, ave_price_df, items_df, total_revenue_df]
purchasing_analysis_df = pd.concat(purchasing_analysis_list, join='outer', axis=1)
purchasing_analysis_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.04,780,2379.77


In [54]:
## Gender Demographics

# number of males
filter_m_df = pd_raw_df.loc[pd_raw_df["Gender"] == "Male", :]
filter_m_df = filter_m_df.groupby(["SN"])
male_vc = filter_m_df["Gender"].value_counts()
male_series = pd.Series(male_vc)
male_count = male_series.count()
male_series = pd.Series(male_count)
male_series

0    484
dtype: int64

In [55]:
# number of females
filter_f_df = pd_raw_df.loc[pd_raw_df["Gender"] == "Female", :]
filter_f_df = filter_f_df.groupby(["SN"])
f_vc = filter_f_df["Gender"].value_counts()
f_series = pd.Series(f_vc)
f_count = f_series.count()
f_series = pd.Series(f_count)
f_series

0    81
dtype: int64

In [56]:
# number of other/non-disclosed
filter_o_df = pd_raw_df.loc[pd_raw_df["Gender"] == "Other / Non-Disclosed", :]
filter_o_df = filter_o_df.groupby(["SN"])
o_vc = filter_o_df["Gender"].value_counts()
o_series = pd.Series(o_vc)
o_count = o_series.count()
o_series = pd.Series(o_count)
o_series

0    11
dtype: int64

In [57]:
# Percentage of players
total_players = male_count + f_count + o_count
print(total_players)

p_male = round(male_count / total_players * 100, 2)
print(p_male)

p_female = round(f_count / total_players * 100, 2)
print(p_female)

p_other = round(o_count / total_players * 100, 2)
print(p_other)

576
84.03
14.06
1.91


In [58]:
gender_index_series = pd.Series(["Male", "Female", "Other / Non-Disclosed"])
total_count_series = pd.Series([male_count, f_count, o_count])
percentage_series = pd.Series([p_male, p_female, p_other])

gds_dict = {}
gds_dict["Total Count"] = []
gds_dict["Percentage of Players"] = []

for x in total_count_series:
    if "Total Count" in gds_dict:
        gds_dict["Total Count"].append(x)
for x in percentage_series:
    if "Percentage of Players" in gds_dict:
        gds_dict["Percentage of Players"].append(x)

gds_df = pd.DataFrame(gds_dict, index=gender_index_series)
gds_df


Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


In [59]:
# purchasing analysis (gender)

# purchase count (female)
f_item_count = filter_f_df["Item Name"].value_counts()
f_pc_count =len(f_item_count.index)
f_pc_count

# purchase count (male)

# purchase count (other)


113