In [3]:
# Dependencies and Setup
import pandas as pd

# File to Load
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


### Player Count

Display the total number of players


In [4]:
# Pass the unique elements in "SN" series to len to get number of elements
uniquePlayers = len(pd.unique(purchase_data["SN"]))

print(f'{uniquePlayers} players.')


576 players.


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [5]:
#pass the unique elements in "item id" to len for a count of unique elements
uniqueItems = len(pd.unique(purchase_data['Item ID']))

# pass mean method to price series
averagePrice = purchase_data['Price'].mean()

# number of elements/rows in the dataframe
totalPurchases = len(purchase_data['Price'])

# sum of price data from dataframe
totalRevenue = purchase_data['Price'].sum()

# construct a dataframe that takes the above calculations and makes them readable
summary_df = pd.DataFrame(
    {
        'Unique Items': uniqueItems,
        'Average Price': averagePrice,
        'Total Purchases': totalPurchases,
        'Total Revenue': totalRevenue
    },
    index=range(0,1)
)

# format the column data to be more readable
format_mapping = {"Average Price": "${:,.2f}","Total Revenue": "${:,.2f}"}

summary_df.style.format(format_mapping)

Unnamed: 0,Unique Items,Average Price,Total Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [8]:
# Construct 3 new dataframes based on the possible gender options
male_df = purchase_data[purchase_data["Gender"]=="Male"]

female_df = purchase_data[purchase_data["Gender"]=="Female"]

other_df = purchase_data[(purchase_data["Gender"]!="Female")&(purchase_data["Gender"]!="Male")]

In [9]:
# Define a function that calculates a percent of purchases for the item passed
def percent(count):
    percent = round(100 * (count / int(totalPurchases)),2)
    return percent

In [10]:
# DataFrame with percentages and count of each gender option
genderDemo = pd.DataFrame(
    {
        "Percentage": [
            percent(len(male_df)),
            percent(len(female_df)),
            percent(len(other_df))
        ],
        "Count": [
            len(male_df),
            len(female_df),
            len(other_df)
        ]
    }, index = ["Male","Female","Other"]
)

genderDemo["Percentage"] = genderDemo["Percentage"].map("{:,.2f}%".format)
genderDemo


Unnamed: 0,Percentage,Count
Male,83.59%,652
Female,14.49%,113
Other,1.92%,15



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [17]:
# Dataframe with price data from each gender dataframe
genderSummary = pd.DataFrame(
    {
        "Purchase Count": [ #how many purchases per gender option
            len(female_df),
            len(male_df),
            len(other_df)
                          ],
        "Average Purchase": [ 
            female_df["Price"].mean(),
            male_df["Price"].mean(),
            other_df["Price"].mean()
                            ],
        "Total Purchase Value": [
            female_df["Price"].sum(),
            male_df["Price"].sum(),
            other_df["Price"].sum()
                                ],
        "Average Purchase Total per Person": [ #for each person, find out how much they spent, then find the average for everyone
            female_df.groupby(["SN"])["Price"].sum().mean(),
            male_df.groupby(["SN"])["Price"].sum().mean(),
            other_df.groupby(["SN"])["Price"].sum().mean()
        ]
        
    }, index=['female','male','other'] 
)                               

In [18]:
# Formatting to prettify 
genderSummary['Average Purchase'] = genderSummary['Average Purchase'].map("${:,.2f}".format)
genderSummary['Average Purchase Total per Person'] = genderSummary['Average Purchase Total per Person'].map("${:,.2f}".format)
genderSummary['Total Purchase Value'] = genderSummary['Total Purchase Value'].map("${:,.2f}".format)
genderSummary
              

Unnamed: 0,Purchase Count,Average Purchase,Total Purchase Value,Average Purchase Total per Person
female,113,$3.20,$361.94,$4.47
male,652,$3.02,"$1,967.64",$4.07
other,15,$3.35,$50.19,$4.56


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [14]:
# Creates the bin based on the dataframe series and chosen bin size
def binConstructor(dataframe, series, bins=4):
    minSeries = dataframe[series].min() -1
    maxSeries = dataframe[series].max() 
    dataRange = maxSeries - minSeries
    output = [minSeries]
    for i in range(1,bins+1):
        point = i*(dataRange/bins) + minSeries
        output.append(point)
        
    return output

   
binAge = binConstructor(purchase_data,"Age",8)
binAge

[6, 10.875, 15.75, 20.625, 25.5, 30.375, 35.25, 40.125, 45.0]

In [16]:
# Appends a bin column to the dataframe 
age_df = purchase_data
age_df["Bin"] = pd.cut(age_df["Age"],binAge) 


pd.DataFrame(
    {
        "Purchase Count": age_df["Bin"].value_counts(),
        "Average Purchase Price":age_df.groupby(age_df["Bin"])["Price"].mean(),
        "Total Purchase Value":age_df.groupby(age_df["Bin"])["Price"].sum(),
        "Average Purchase Total":age_df.groupby(["Bin","SN"])["Price"].sum().mean(),
    },
    index = pd.unique(age_df["Bin"])

)

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total
"(15.75, 20.625]",200,3.1078,621.56,4.131545
"(35.25, 40.125]",33,3.404545,112.35,4.131545
"(20.625, 25.5]",325,3.020431,981.64,4.131545
"(30.375, 35.25]",52,2.994423,155.71,4.131545
"(25.5, 30.375]",77,2.875584,221.42,4.131545
"(10.875, 15.75]",54,2.9,156.6,4.131545
"(6.0, 10.875]",32,3.405,108.96,4.131545
"(40.125, 45.0]",7,3.075714,21.53,4.131545


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

