In [74]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data_df = pd.read_csv(file_to_load)
purchase_data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [81]:
# Player Count
# ~~total number of players~~

total_player_df = purchase_data_df[["SN", "Gender", "Age"]]

# This part removes the duplicate values from "SN" column
total_player_No_Dup_df = total_player_df.drop_duplicates(subset=["SN"])

total_player = total_player_No_Dup_df["SN"].count()

print("Total Player Count: " + str(total_player))

Total Player Count: 576


In [82]:
# Purchasing Analysis (Total)

# Variables
total_unique_items = purchase_data_df["Item ID"].nunique()
total_purchases = purchase_data_df["Purchase ID"].count()
average_item_price = purchase_data_df["Price"].mean()
total_revenue = purchase_data_df["Price"].sum()


# Summary Table
purch_summary_df = pd.DataFrame([
    {"Unique Items purchased": total_unique_items,
     "Total Purchases": total_purchases,
     "Average Item Price": average_item_price,
     "Total Revenue": total_revenue}
])

# Column Formating

purch_summary_df["Average Item Price"] = purch_summary_df["Average Item Price"].map("${:,.2f}".format)
purch_summary_df["Total Revenue"] = purch_summary_df["Total Revenue"].map("${:,.2f}".format)

#Display purchasing analysis
purch_summary_df

Unnamed: 0,Unique Items purchased,Total Purchases,Average Item Price,Total Revenue
0,179,780,$3.05,"$2,379.77"


In [83]:
# Player Gender Demographics
# ~~Find breakdown count & percentage of player genders in data~~


gender_count_df = pd.DataFrame([total_player_No_Dup_df["Gender"].value_counts()]).transpose()
gender_percent = gender_count_df["Gender"] / total_player
gender_count_df.rename(columns ={"Gender": "Total Count"})
gender_count_df["Percent of Players"] = gender_percent.map("{:.2%}".format)
gender_count_df_2 = gender_count_df.rename(columns ={"Gender": "Total Count"})
gender_count_df_2

Unnamed: 0,Total Count,Percent of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [84]:
# Purchasing Analysis (Gender)

# Setup the groupby to grab the data
gender_purch_group_df = purchase_data_df.groupby(["Gender"])

# Set the variables by modifying the function at the end of the groupby reference
gender_purch_avg = gender_purch_group_df["Price"].mean()
gender_purch_total = gender_purch_group_df["Price"].sum()
gender_purch_count = gender_purch_group_df["Purchase ID"].count()
gender_purch_person = (gender_purch_total / gender_count_df_2["Total Count"])

# Put it all in a new data frame
gender_purch_summary_df = pd.DataFrame({"Purchase Count": gender_purch_count,
                                        "Average Purchase Price": gender_purch_avg,
                                        "Total Purchase Value": gender_purch_total,
                                        "Avg Total Purchase per Person": gender_purch_person})


# Column Formating
gender_purch_summary_df["Average Purchase Price"] = gender_purch_summary_df["Average Purchase Price"].map("${:,.2f}".format)
gender_purch_summary_df["Total Purchase Value"] = gender_purch_summary_df["Total Purchase Value"].map("${:,.2f}".format)
gender_purch_summary_df["Avg Total Purchase per Person"] = gender_purch_summary_df["Avg Total Purchase per Person"].map("${:,.2f}".format)

#Display purchasing analysis
gender_purch_summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [88]:
# Age Demographics
# ~~Find breakdown count & percentage of player ages in data~~

#setup initial dataframe
player_age_demo_def = total_player_No_Dup_df

#set up bin and group names
bins = [0, 9, 14, 19, 24, 29, 34, 39, 2000]
group_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

#add the group name to player_age_demo_def
player_age_demo_def["Age Group"] = pd.cut(player_age_demo_def["Age"], bins, labels=group_names, include_lowest=True)

#Create the dataframe that will hold final summary table
age_count_df = pd.DataFrame([player_age_demo_def["Age Group"].value_counts()]).transpose()
age_percent = age_count_df["Age Group"] / total_player
age_count_df["Percent of Players"] = age_percent.map("{:.2%}".format)
age_count_df_2 = age_count_df.rename(columns ={"Age Group": "Total Count"})
age_count_df_2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


Unnamed: 0,Total Count,Percent of Players
20-24,258,44.79%
15-19,107,18.58%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
10-14,22,3.82%
<10,17,2.95%
40+,12,2.08%
