# Dependancies

In [None]:
#import dependencies
import pandas as pd
import numpy as npy

#get the file
file_path = "Resources/purchase_data.csv"

#read the file into data frame
main_df = pd.read_csv(file_path)

# Functions

In [115]:
#format a nice currency string
def pretty_money(num_float):
    text_money =     '${:,.2f}'.format(num_float)
    
    return text_money

In [135]:
main_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [132]:
#get a count of the columns
main_df.count()

Purchase ID    780
SN             780
Age            780
Gender         780
Item ID        780
Item Name      780
Price          780
dtype: int64

In [133]:
#get the column names
main_df.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'], dtype='object')

In [134]:
#view some stats
main_df.describe()

Unnamed: 0,Purchase ID,Age,Item ID,Price
count,780.0,780.0,780.0,780.0
mean,389.5,22.714103,92.114103,3.050987
std,225.310896,6.659444,52.775943,1.169549
min,0.0,7.0,0.0,1.0
25%,194.75,20.0,48.0,1.98
50%,389.5,22.0,93.0,3.15
75%,584.25,25.0,139.0,4.08
max,779.0,45.0,183.0,4.99


# Player Count

In [22]:
#get a count of the total player base

#this line does not remove duplicates, rather the total count of records for this column
#total_player_count = main_df["SN"].count()

#count the unique players and get the length of the list returned
total_player_count = len(main_df["SN"].unique())

print(total_player_count)

576


In [12]:
#this accomplishes the same as above without the extra overhead
#same result
unique_item_count = len(main_df["Item ID"].value_counts())
print(unique_item_count)

183


In [13]:
#total purchase values
total_values_num = main_df["Price"].sum()
print(total_values_num)

2379.77


In [5]:
#average item price
average_item_price = main_df["Price"].mean()
average_item_price = round(average_item_price,2)
print(average_item_price)

3.05


In [179]:
#the total number of purchases 
purchases_count = total_player_count
print(purchases_count)

576


In [14]:
purchasing_summary_df = pd.DataFrame({"Number of Unique Items": unique_item_count
                          ,"Average Price": pretty_money(average_item_price)
                          ,"Number of Purchases": purchases_count
                          ,"Total Revenue": pretty_money(total_values_num)                            
                                    }, index=[0])
purchasing_summary_df.head()

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,"$2,379.77"


In [77]:
#https://stackoverflow.com/questions/16947336/binning-a-dataframe-in-pandas-in-python
#46- Pandas DataFrames: Finding Min/Max Element    https://www.youtube.com/watch?v=C8sPV25PCs0

#find the min / max age to make bins 
#7 and 45, so use 5 - 50 for bins
min_age_index = main_df["Age"].idxmin()
min_age = main_df["Age"][min_age_index]
#print(min_age)

max_age_index = main_df["Age"].idxmax()
max_age = main_df["Age"][max_age_index]
#print(max_age)

print(f'Youngest age is: {min_age} vs. Oldest age: {max_age}')

Youngest age is: 7 vs. Oldest age: 45


In [18]:
#IDEA:
#use https://www.statcan.gc.ca/eng/concepts/definitions/age2
#               Age Categories, Life Cycle Groupings


# Gender Demographics

In [165]:
gender_df = pd.DataFrame(main_df["Gender"].value_counts())
#numtest = main_df.loc[usks_df["Gender"]=="Male"]
gender_df.head()

Unnamed: 0,Gender
Male,652
Female,113
Other / Non-Disclosed,15


In [99]:
#use len to determine the number of rows 
#gender_divisor = len(gender_df)

gender_divisor = gender_df["Gender"].sum()
gender_divisor

780

In [121]:
#get gender percentages
#genders / total amount * 100 then round it
#gender_df["Gender Percentage"] = pd.DataFrame(gender_df["Gender"]/gender_divisor*100)
numdivi = round(gender_df["Gender"]/gender_divisor*100,2)

gender_df["Player Percentage"] = pd.DataFrame(numdivi)
gender_df.head()

Unnamed: 0,Gender,Player Percentage
Male,652,83.59
Female,113,14.49
Other / Non-Disclosed,15,1.92


In [130]:
#rearrange columns


genderdemo_df = gender_df.rename(columns={"Gender":"Player Count"})
genderdemo_df.head()

#genderdemo_df.head()

Unnamed: 0,Player Count,Player Percentage
Male,652,83.59
Female,113,14.49
Other / Non-Disclosed,15,1.92


In [177]:
#Purchase Analysis - Purchase Count
#Python%20Week%204/KickstarterClean.ipynb
#agg count group by the gender then Player Name
pag_purchasecount_df = pd.DataFrame(main_df.groupby("Gender")["SN"].count())
pag_purchasecount_df = pag_purchasecount_df.rename(columns={"SN":"Item Count"})
pag_purchasecount_df

Unnamed: 0_level_0,Item Count
Gender,Unnamed: 1_level_1
Female,113
Male,652
Other / Non-Disclosed,15


In [169]:
#Python%20Week%204/KickstarterClean.ipynb
#agg sum of grouping by the gender then price
genpurch = pd.DataFrame(main_df.groupby("Gender")["Price"].sum())
genpurch


Unnamed: 0_level_0,Price
Gender,Unnamed: 1_level_1
Female,361.94
Male,1967.64
Other / Non-Disclosed,50.19


In [175]:
#Python%20Week%204/Merging.ipynb
##merges the two data frames from above
#pur_analysis_gen = pd.merge(pur_count_by_gen, total_pur_by_gen, left_index = True, right_index = True)
purchase_analysis_merge_df = pd.merge(groupbytype,genpurch, on="Gender", how="left")
purchase_analysis_merge_df

Unnamed: 0_level_0,SN,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,113,361.94
Male,652,1967.64
Other / Non-Disclosed,15,50.19
