In [1]:
# Import libraries 
import pandas as pd
import numpy as np 
import os

In [2]:
# Read the csv file
path = 'Resources/purchase_data.csv'
df = pd.read_csv(path)
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
# Get a quick overview of the data
# length of our dataset
df.shape

(780, 7)

In [4]:
# Check if there are null values
df.isnull().sum()

Purchase ID    0
SN             0
Age            0
Gender         0
Item ID        0
Item Name      0
Price          0
dtype: int64

In [5]:
# Look for duplicates
df.duplicated().sum()

0

In [6]:
# Check columns datatype
df.dtypes

Purchase ID      int64
SN              object
Age              int64
Gender          object
Item ID          int64
Item Name       object
Price          float64
dtype: object

In [7]:
# Check duplicates in 'SN' columns
df['SN'].duplicated().sum()

204

# Player Count
- Display the total number of players


In [8]:
# Calculate the unique player count
number_players = df['SN'].nunique()

# Place the number of players into a dataframe 
pd.DataFrame(number_players, columns = ['Total_Players'], index=[0])

Unnamed: 0,Total_Players
0,576


# Purchasing Analysis (Total)
- Run basic calculations to obtain: Number of Unique Items, Average Purchase Price, Total Number of Purchases, Total Revenue

- Create a summary data frame to hold the results

- Optional: give the displayed data cleaner formatting

- Display the summary data frame

In [9]:
# Check the kind of products that were purchased 
df['Item Name'].value_counts()

Final Critic                                    13
Oathbreaker, Last Hope of the Breaking Storm    12
Persuasion                                       9
Extraction, Quickblade Of Trembling Hands        9
Fiery Glass Crusader                             9
                                                ..
Undead Crusader                                  1
Endbringer                                       1
Celeste                                          1
Alpha, Reach of Ending Hope                      1
Ghost Reaver, Longsword of Magic                 1
Name: Item Name, Length: 179, dtype: int64

In [10]:
# Calculate number of unique items
unique_items = df['Item ID'].nunique()

# Calculate the average price of purchase
avg_price = df['Price'].mean()

# Calculate the number of purchases
total_purchases = df.shape[0]

# Calculate the total revenue 
total_revenue = df['Price'].sum()

# change the format of avg_ price and total_revenue 
mean_format = '${:.2f}'.format(avg_price)

total_format = '${:,.2f}'.format(total_revenue)

In [11]:
# purchasing analysis Dataframe
summary_df = pd.DataFrame({'Number of Unique Items': [unique_items], 
                          'Average Price': [mean_format], 
                          'Number of Purchases': [total_purchases],
                          'Total Revenue': [total_format]})
summary_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


# Gender Demographics
 
- Percentage and Count of Male Players

- Percentage and Count of Female Players

- Percentage and Count of Other / Non-Disclosed

In [12]:
# Check Gender keys
keys = df['Gender'].value_counts().keys().to_list()
keys

['Male', 'Female', 'Other / Non-Disclosed']

In [13]:
# Values With duplicates
values = df['Gender'].value_counts().to_list()
values

[652, 113, 15]

In [14]:
# Calculate the total count of males with out duplicates
male_count = df['SN'][df['Gender']== 'Male'].nunique()
print(male_count)

# Calculate the percent of males with out duplicates
m_percent = male_count/df['SN'].nunique()
male_percent = '{:.2%}'.format(m_percent)
print(male_percent)

484
84.03%


In [15]:
# Calculate the total count of female with out duplicates
female_count = df['SN'][df['Gender']== 'Female'].nunique()
print(female_count)

# Calculate the percent of males with out duplicates
f_percent = female_count/df['SN'].nunique()
female_percent = '{:.2%}'.format(f_percent)
print(female_percent)

81
14.06%


In [16]:
# Calculate the total count of Other / Non-Disclosed with out duplicates
other_count = df['SN'][df['Gender']== 'Other / Non-Disclosed'].nunique()
print(other_count)

# Calculate the percent of males with out duplicates
oth_percent = other_count/df['SN'].nunique()
other_percent = '{:.2%}'.format(oth_percent)
print(other_percent)

11
1.91%


In [17]:
# Create a new dataframe with the gender demographics analysis
gender_df = pd.DataFrame({'Total Count': [male_count, female_count, other_count],
                         'Percentage of Players': [male_percent, female_percent, other_percent]}, 
                        index=['Male', 'Female', 'Other / Non-Disclosed'])
gender_df

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


# Purchasing Analysis (Gender)

- Run basic calculations to obtain purchase count, avg. purchase price, total purchase value, Average Purchase Total per Person by Gender

- Create a summary data frame to hold the results

- Optional: give the displayed data cleaner formatting

- Display the summary data frame

In [18]:
# group by gender and get the average price of purchase and place the results into a dataframe
g_price = df.groupby('Gender')[['Price']].mean()
g_price = g_price.rename(columns={'Price':'Average Purchase Price'})
g_price

Unnamed: 0_level_0,Average Purchase Price
Gender,Unnamed: 1_level_1
Female,3.203009
Male,3.017853
Other / Non-Disclosed,3.346


In [19]:
# group by gender and get the total purchase value and place the results into a dataframe
g_sum = df.groupby('Gender')[['Price']].sum()
g_sum = g_sum.rename(columns={'Price': 'Total Purchase Value'})
g_sum

Unnamed: 0_level_0,Total Purchase Value
Gender,Unnamed: 1_level_1
Female,361.94
Male,1967.64
Other / Non-Disclosed,50.19


In [20]:
# group by gender and get the number of purchase and place the results into a dataframe
p_count = df.groupby('Gender')[['Purchase ID']].count()
p_count = p_count.rename(columns={'Purchase ID':'Purchase Count'})
p_count

Unnamed: 0_level_0,Purchase Count
Gender,Unnamed: 1_level_1
Female,113
Male,652
Other / Non-Disclosed,15


In [21]:
# Divide the total purchase value by the unique number of users (sort them for it can have the same order than the numerator)
price_person = df.groupby('Gender')['Price'].sum()/gender_df['Total Count'].sort_index()

# Place the results into a dataframe
price_p = price_person.to_frame()

# Rename column
price_p = price_p.rename(columns = {0:'Avg Total Purchase per Person'})
price_p

Unnamed: 0_level_0,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1
Female,4.468395
Male,4.065372
Other / Non-Disclosed,4.562727


In [22]:
# Use pd.merge function to start joining all the dataframes related to this analysis by using left and right index
pa_1 = g_price.merge(g_sum, left_index=True, right_index=True)
pa_1

Unnamed: 0_level_0,Average Purchase Price,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,3.203009,361.94
Male,3.017853,1967.64
Other / Non-Disclosed,3.346,50.19


In [23]:
# Use pd.merge function to start joining all the dataframes related to this analysis by using left and right index
pa_2 = pa_1.merge(p_count, left_index=True, right_index=True)
pa_2

Unnamed: 0_level_0,Average Purchase Price,Total Purchase Value,Purchase Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Female,3.203009,361.94,113
Male,3.017853,1967.64,652
Other / Non-Disclosed,3.346,50.19,15


In [24]:
# Use pd.merge function to start joining all the dataframes related to this analysis by using left and right index
pa_3 = pa_2.merge(price_p, left_index=True, right_index=True)
pa_3

Unnamed: 0_level_0,Average Purchase Price,Total Purchase Value,Purchase Count,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,3.203009,361.94,113,4.468395
Male,3.017853,1967.64,652,4.065372
Other / Non-Disclosed,3.346,50.19,15,4.562727


In [25]:
# rearrange the order of the columns
purchasing_analysis = pa_3[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 
                            'Avg Total Purchase per Person']]
purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.203009,361.94,4.468395
Male,652,3.017853,1967.64,4.065372
Other / Non-Disclosed,15,3.346,50.19,4.562727


In [26]:
# us apply to change the format of the columns
purchasing_analysis['Average Purchase Price'] = purchasing_analysis['Average Purchase Price'].apply('${:.2f}'.format)
purchasing_analysis['Total Purchase Value'] = purchasing_analysis['Total Purchase Value'].apply('${:,.2f}'.format)
purchasing_analysis['Avg Total Purchase per Person'] = purchasing_analysis['Avg Total Purchase per Person'].apply('${:.2f}'.format)

In [27]:
purchasing_analysis

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


# Age Demographics

- The below each broken into bins of 4 years (i.e. <10, 10-14, 15-19, etc.)

 - Purchase Count
 - Average Purchase Price
 - Total Purchase Value
 - Average Purchase Total per Person by Age Group
 
 
 
- Establish bins for ages

- Categorize the existing players using the age bins. Hint: use pd.cut()

- Calculate the numbers and percentages by age group

- Create a summary data frame to hold the results

- Optional: round the percentage column to two decimal points

- Display Age Demographics Table