In [None]:
# Dependencies and Setup
import pandas as pd
import numpy as np
from IPython.display import display

In [None]:
# Specifying file path
file_path="Resources/purchase_data.csv"

In [None]:
# Read purchase_data file and stores into pandas data frame
purchase_read=pd.read_csv(file_path, delimiter=',').copy()
# Copy of dataframe created to keep the original in its own
players_df = pd.DataFrame(purchase_read).copy()
players_df.head()

In [None]:
# Unique value of SN gives total number of players
uniq_players=len(players_df["SN"].unique())
# Display of total number of players in a data frame
total_players=pd.DataFrame({"Total Players":[uniq_players]})
total_players

In [None]:
# Length of series of Item Id unique value is equal to number of unique items
uniq_items=len(players_df["Item ID"].unique())
# Calculating average of Price
avg_price=round(players_df["Price"].mean(),2)
# Counting purchase id gives total purchase
total_purchase=players_df["Purchase ID"].count()
# Sum of Price series gives total revenue
total_revenue=players_df["Price"].sum()
# A dataframe created for the disired output
purchase_analysis=pd.DataFrame({"Number of Unique Items":[uniq_items], "Average Price":[avg_price], 
              "Total Purchase":[total_purchase], "Total Revenue":[total_revenue]})
# Style formatting to get $ sign and two decimal places
purchase_analysis.style.format({'Average Price': "${0:.2f}", "Total Revenue":"${0:.2f}"})

In [None]:
# Gender Demographics
# Duplicates are droping the SN column
dup_drop_sn=players_df.drop_duplicates(['SN'])
# Grouping based on gender with above series
gender_demo=dup_drop_sn.groupby('Gender')
# Gender count from grouped data
total_count=gender_demo['Gender'].count()
# Calculating total players using the sum function
demo_total=total_count.sum()
# New dataframe created for the gender demographics
gender_demo_df=pd.DataFrame({"Total Count":total_count, 
                             "Percentage of Players":total_count/demo_total})
# Style formatting to get two decimal places with percentage sign
gender_demo_df.style.format({'Percentage of Players': "{0:.2%}"})

In [None]:
# Calculating purchasing analysis (Gender)
male_players=players_df.loc[players_df["Gender"]=="Male",:]
# count of male players with reference to column SN and len funtion
male_players_count=len(male_players['SN'].unique())                                        
female_players=players_df.loc[players_df["Gender"]=="Female",:]
female_players_count=len(female_players['SN'].unique())
other_players=players_df.loc[players_df["Gender"]=="Other / Non-Disclosed",:]
other_players_count=len(other_players['SN'].unique())

male_purchase_count=len(male_players['Purchase ID'].unique())
female_purchase_count=len(female_players['Purchase ID'].unique())
other_purchase_count=len(other_players['Purchase ID'].unique())
male_tot_purchase=male_players['Price'].sum()
female_tot_purchase=female_players['Price'].sum()
other_tot_purchase=other_players['Price'].sum()

# purchasing analysis dataframe(Gender)
purchase_df=pd.DataFrame({'Purchase Count':[male_purchase_count,female_purchase_count,other_purchase_count],
                         'Average Purchase Price':['${0:.2f}'.format(male_tot_purchase/male_purchase_count),
                                                   '${0:.2f}'.format(female_tot_purchase/female_purchase_count),
                                                   '${0:.2f}'.format(other_tot_purchase/other_purchase_count)],
                          'Total Purcase Value':[male_tot_purchase,female_tot_purchase,other_tot_purchase],
                          'Avg Total Purchase Per Person':['${0:.2f}'.format(male_tot_purchase/male_players_count),
                                                           '${0:.2f}'.format(female_tot_purchase/female_players_count),
                                                           '${0:.2f}'.format(other_tot_purchase/other_players_count)]})

# Set index as per the output
purchase_df.set_index([pd.Index(['Male', 'Female', 'Other / Non-Disclosed'])])

In [None]:
# Calculating age Demographics of players
# Bins/buckets for groupig data
bins=[0,9,14,19,24,29,34,39,100]
# Labels in accordance with the bin values
age_group=['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+'] 
# pd.cut is used to cut data with bin values
players_df['Age Ranges']=pd.cut(players_df.Age, bins, labels=age_group) 
# Data cleaning based "SN" values
age_demo=players_df.drop_duplicates(['SN']) 
# Values sorted based on Age and then grouping by age ranges and count functions
df=age_demo.sort_values('Age').groupby('Age Ranges').count() 
# A new dataframe is created for the age demographics
df=pd.DataFrame({"Count":(df.Age), 'Percentage of Players':(df.Age/576)})
# Style formatting into percentage
df.style.format({'Percentage of Players': "{0:.2%}"})                                     

In [None]:
# Calculating purchasing analysis by age
# Sorting dataframe by Age and grouping by Age Ranges
age_ranges=players_df.sort_values('Age').groupby('Age Ranges').count()
# Calculating purchase count with age
purchase_count=age_ranges.Age
# Calculating average purchase price 
pur_analysis=players_df.sort_values('Age').groupby('Age Ranges')
avg_purchase_price=pur_analysis.Price.mean()
# Calculating total purchase price
tot_purchase_price=pur_analysis.Price.sum()

# Average total purchase per person
# Droping duplicates of column SN and sorting and grouping to get the count
tot_uniq_person=players_df.drop_duplicates(['SN'])
uniq_person_grp=tot_uniq_person.sort_values('Age').groupby('Age Ranges')
# Finding the the count in ranges based on 'Purchase ID'
range_person_count=uniq_person_grp['Purchase ID'].count()
# Finding the average
avg_purchase_per_person=tot_purchase_price/range_person_count

# Purchase analysis(age) dataframe
purchase_analysis=pd.DataFrame({"Purchase Count":purchase_count, "Average Purchase Price":
                            avg_purchase_price, "Total Purchase Value":tot_purchase_price,
                               "Avg Total Purchase Per Person":avg_purchase_per_person})

# Style formating the dataframe to get $ sign and two decimal places
purchase_analysis.style.format({'Average Purchase Price': "${0:.2f}", "Total Purchase Value":"${0:.2f}",
                               "Avg Total Purchase Per Person":"${0:.2f}"})

In [None]:
# Calculating top spenders
# Grouping based on column SN
group_sn = players_df.groupby('SN')
# Calculating the sum of Price
p_value=group_sn.Price.sum()
# Purchase count of top spenders
p_count=group_sn['Item ID'].count()

# Top spenders datafram
top_spenders=pd.DataFrame({"Purchase Count":p_count,"Average Purchase Price":p_value/p_count,"Total Purchase Value":p_value})

# Style formatting the dataframe in the descending order of Total Purchase Value
top_spenders.sort_values(by=['Total Purchase Value'],ascending=False).head().style.format({
    'Average Purchase Price': "${0:.2f}", "Total Purchase Value":"${0:.2f}"})


In [None]:
# Calculating most popular items
# Retrieve the Item ID, Item Name, and Item Price columns
item_df=players_df[['Item ID', 'Item Name', 'Price']]
# Group by Item ID and Item Name
item_group=item_df.groupby(['Item ID', 'Item Name'])
# purchase count calculated by Item ID
phs_count=item_group['Item ID'].count()
# purchase sum calculated by Price
phs_value=item_group['Price'].sum()

# Most popular items dataframe
most_pop_items=pd.DataFrame({"Purchase Count":phs_count, "Item Price":round(phs_value/phs_count,2),
                             "Total Purchase Value":phs_value})
# Rearranging dataframe based on purchase count and style formatting to get two decimal places with $ sign
most_pop_items.sort_values(by=['Purchase Count'],ascending=False).head().style.format({
    'Item Price': "${0:.2f}", "Total Purchase Value":"${0:.2f}"})

In [None]:
# Calculating most pofitable items
# New dataframe based on Total Purchase Value and style formatting to get two decimal places with $ sign
most_pop_items.sort_values(by=['Total Purchase Value'],ascending=False).head().style.format({
    'Item Price': "${0:.2f}", "Total Purchase Value":"${0:.2f}"})