In [1]:
# Importing Dependencies
import pandas as pd
# For calling the file that will be read in, I prefer to use the os method rather than listing the path out since it is more universal
import os
%config Completer.use_jedi = False

In [2]:
# First, let's get our file location
filepath = os.path.join("Resources", "purchase_data.csv")

# Then read in CSV in
purchaseDataDf = pd.read_csv(filepath)
# and make sure it was read in correctly by checking the dataframe
purchaseDataDf.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
# PLAYER COUNT

In [18]:
# To do this, we just need to find the total count of purchases
numberOfPlayers = purchaseDataDf['SN'].nunique()
players = [{"Total Players" : numberOfPlayers}]
totalPlayers = pd.DataFrame(players)
print("TOTAL PLAYERS:")
print("--------------")
totalPlayers

TOTAL PLAYERS:
--------------


Unnamed: 0,Total Players
0,576


In [19]:
# Next, let's get some total purchasing analytics
print(f'PURCHASING ANALYTICS (TOTAL)')
print(f'----------------------------')

# Here I get the average price of the price column
averagePrice = purchaseDataDf['Price'].mean()

# Then Format it to be put in our dataframe later
averagePriceFormatted = "${:.2f}".format(averagePrice)

# Then we grab the number of unique items through the Item ID column
uniqueItems = purchaseDataDf['Item ID'].nunique()

# Set that nunique to a new output so it can be converted into a dataframe
itemDF = [{"Number Of Unique Items" : uniqueItems}]

# Let's calculate the total purchases
totalPurchases = purchaseDataDf['Purchase ID'].count()

# Now let's get the total revenue
totalRevenue = purchaseDataDf['Price'].sum()
totalRevenueFormatted = "${:.2f}".format(totalRevenue)

# Convert our output into a new dataframe to store all our purchasing analytics
purchaseAnalytics = pd.DataFrame(itemDF)

# Add our average price we calculated earlier to our dataframe
purchaseAnalytics["Average Price"] = averagePriceFormatted

# Add our total purchases to our dataframe
purchaseAnalytics["Total Purchases"] = totalPurchases

# Lastly, add our total revenue to our dataframe and print it out
purchaseAnalytics["Total Revenue"] = totalRevenueFormatted
purchaseAnalytics

PURCHASING ANALYTICS (TOTAL)
----------------------------


Unnamed: 0,Number Of Unique Items,Average Price,Total Purchases,Total Revenue
0,179,$3.05,780,$2379.77


In [5]:
# GENDER DEMOGRAPHICS

In [5]:
# First, get the total number of unique players to be used in our calculation.
playerCount = len(purchaseDataDf["SN"].unique())

# Here we get a new dataframe with only our male players to do calculations later
malePlayers = pd.DataFrame(purchaseDataDf[purchaseDataDf["Gender"] == "Male"][["SN", "Gender"]])

# For the math, I set the maleCount variable equal to the number of unique players in my male dataframe
maleCount = len(malePlayers["SN"].unique())

# Then I do math to calculate the percentage and format it as a percentage
malePercentage = "{:.2f}%".format((maleCount/playerCount) * 100)

# Finally I make a new dataframe for our males to append into our total later
maleDf = pd.DataFrame([{"Total Count" : maleCount, "Percentage of Players" : malePercentage, "Gender" : "Male"}])

# And set the index to males
maleFinal = maleDf.set_index("Gender")

In [6]:
# Now we're doing the same thing for females

# Here we get a new dataframe with only our female players to do calculations later
femalePlayers = pd.DataFrame(purchaseDataDf[purchaseDataDf["Gender"] == "Female"][["SN", "Gender"]])

# For the math, I set the femaleCount variable equal to the number of unique players in my male dataframe
femaleCount = len(femalePlayers["SN"].unique())

# Then I do math to calculate the percentage and format it as a percentage
femalePercentage = "{:.2f}%".format((femaleCount/playerCount) * 100)

# Finally I make a new dataframe for our females to append into our total later
femaleDf = pd.DataFrame([{"Total Count" : femaleCount, "Percentage of Players" : femalePercentage, "Gender" : "Female"}])

# And set the index to females
femaleFinal = femaleDf.set_index("Gender")

In [15]:
#Now last but not least, we do the same thing for the Other / Non-Disclosed gender

# Here we get a new dataframe with only our Other / Non-Disclosed players to do calculations later
otherPlayers = pd.DataFrame(purchaseDataDf[purchaseDataDf["Gender"] == "Other / Non-Disclosed"][["SN", "Gender"]])

# For the math, I set the otherCount variable equal to the number of unique players in my male dataframe
otherCount = len(otherPlayers["SN"].unique())

# Then I do math to calculate the percentage and format it as a percentage
otherPercentage = "{:.2f}%".format((otherCount/playerCount) * 100)

# Finally I make a new dataframe for our Other / Non-Disclosed to append into our total later
otherDf = pd.DataFrame([{"Total Count" : otherCount, "Percentage of Players" : otherPercentage, "Gender" : "Other / Non-Disclosed"}])

# And set the index to Other / Non-Disclosed
otherFinal = otherDf.set_index("Gender")

In [20]:
# Alright! And now we add those three dataframes we made together into one dataframe to show everything
genderFinalDf = maleFinal
genderFinalDf = genderFinalDf.append(femaleFinal)
genderFinalDf = genderFinalDf.append(otherFinal)
print("GENDER DEMOGRAPHICS:")
print("--------------------")
genderFinalDf

# I chose to do this method to show off a bit more skill in using various pandas commands
# I considered originall just doing all the math then manually creating a dataframe with the new math
# but to keep it all within pandas, I decided to manipulate dataframes instead.

GENDER DEMOGRAPHICS:
--------------------


Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [17]:
# PURCHASING ANALYSIS (GENDER)