In [1]:
# importing my dependencies

import pandas as pd
import os, csv

In [2]:
# bring in my csv file

pymoli = os.path.join ("Resources","purchase_data.csv")

In [3]:
# convert file to a pandas dataFrame

hop_df = pd.read_csv(pymoli)
hop_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [4]:
## PLAYER COUNT

# determining the number of Unique Screennames, as well as their purchase counts

uniquePlayerCount = hop_df["SN"].nunique()

# creating a dictionary out of the values we need

player_list = [{"Total Players":uniquePlayerCount}]

# turning that dictionary to a DataFrame, and printing it to the terminal
totalPlayers = pd.DataFrame(player_list)
totalPlayers

Unnamed: 0,Total Players
0,576


In [5]:
## PURCHASING ANALYSIS (TOTAL)

# determining the number of Unique Items that were purchased, average purchase price,
# total number of purchases, and total revenue

uniqueItemCount = hop_df['Item ID'].nunique()
avgPrice = hop_df['Price'].mean()
totalPurchases = hop_df['Purchase ID'].count()
totalRevenue = hop_df['Price'].sum()

# creating a dictionary

purchasingAnalysis = [{
    "Number of Unique Items":uniqueItemCount,
    "Average Price":avgPrice,
    "Number of Purchases":totalPurchases,
    "Total Revenue":totalRevenue
}]

# creating the DataFrame

pa_df = pd.DataFrame(purchasingAnalysis)

# format the DataFrame

format_dict = {'Number of Unique Items':"{:}",
              'Average Price':"${:.2f}",
              'Number of Purchases':"{:}",
              'Total Revenue':"${0:,.2f}"}
cleanedPA = pa_df.style.format(format_dict)

# print new DataFrame to terminal

cleanedPA

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


In [6]:
# GENDER DEMOGRAPHICS

# determining the percentage and count of Male players, the percentage and
# count of Female players, and the percentage and count of Other / Non-Disclosed

# get rid of all duplicate entries
gender = hop_df.drop_duplicates(subset="SN")
gender2 = gender.groupby("Gender")
gender3 = gender2.count()

# pulling individual counts from the DataFrame

maleCount = gender3.loc["Male","Purchase ID"]
femaleCount = gender3.loc["Female","Purchase ID"]
otherCount = gender3.loc["Other / Non-Disclosed","Purchase ID"]

# turn my total players DataFrame into an integer

playersCount = totalPlayers.iloc[0,0]

# calculating percentages

malePercent = (maleCount / playersCount) * 100
femalePercent = (femaleCount / playersCount) * 100
otherPercent = (otherCount / playersCount) * 100

# creating the dictionary (soon-to-be DataFrame)

gd = {
    "Total Count":{"Male":maleCount,
                  "Female":femaleCount,
                  "Other / Non-Disclosed":otherCount},
    "Percentage of Players":{"Male":malePercent,
                            "Female":femalePercent,
                            "Other / Non-Disclosed":otherPercent}
}

# creating the DataFrame

genderDemographics = pd.DataFrame(gd)
genderDemographics

# formatting the values

format_dict2 = {
    "Total Count":"{:}",
    "Percentage of Players":"{:.2f}%"}

cleanedGD = genderDemographics.style.format(format_dict2)

# output to the terminal

cleanedGD

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [7]:
## PURCHASING ANALYSIS (GENDER)

# Setting Index to Gender

paG = hop_df.set_index("Gender")

# obtaining the counts for female

femaleP_count = paG.loc["Female","Purchase ID"].count()
femalePrice = paG.loc["Female","Price"]
femalePrice_tot = femalePrice.sum()

# obtaining the counts for male

maleP_count = paG.loc["Male","Purchase ID"].count()
malePrice = paG.loc["Male","Price"]
malePrice_tot = malePrice.sum()

# obtaining the counts for other

otherP_count = paG.loc["Other / Non-Disclosed","Purchase ID"].count()
otherPrice = paG.loc["Other / Non-Disclosed","Price"]
otherPrice_tot = otherPrice.sum()

# creating a dictionary of our values

paGender = {
    "Purchase Count":{
        "Female":femaleP_count,
        "Male":maleP_count,
        "Other / Non-Disclosed":otherP_count},
    "Average Purchase Price":{
        "Female":femalePrice.mean(),
        "Male":malePrice.mean(),
        "Other / Non-Disclosed":otherPrice.mean()},
    "Total Purchase Value":{
        "Female":femalePrice_tot,
        "Male":malePrice_tot,
        "Other / Non-Disclosed":otherPrice_tot},
    "Avg Total Purchase per Person":{
        "Female":(femalePrice_tot / femaleCount),
        "Male":(malePrice_tot / maleCount),
        "Other / Non-Disclosed":(otherPrice_tot / otherCount)}
    }

# converting the dictionary to a DataFrame

paGender_df = pd.DataFrame(paGender)
paGender_df

# formatting the values

format_dict3 = {
    "Purchase Count":"{:}",
    "Average Purchase Price":"${:.2f}",
    "Total Purchase Value":"${0:,.2f}",
    "Avg Total Purchase per Person":"${:.2f}"
}

cleanedPA_Gender = paGender_df.style.format(format_dict3)
cleanedPA_Gender.index.name = "Gender"

# Print out table to terminal

cleanedPA_Gender

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [12]:
## AGE DEMOGRAPHICS

# establishing my bin for age

ageRange = [0,9,14,19,24,29,34,39,45]

groups = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

# get rid of all duplicate entries

ad = hop_df.drop_duplicates(subset="SN")

# cut my DataFrame into my bins

pd.cut(ad['Age'],ageRange,labels=groups)

# add series to DataFrame

ad['Age Range'] = pd.cut(ad['Age'],ageRange,labels=groups)

# creating a total count by age range dataframe

total_count = ad[['Age Range','SN']]
total_count2 = total_count.groupby("Age Range").count()
total_count2["Total Count"] = total_count2['SN']

AD_f = total_count2[['Total Count']]

# creating a percentage of players dataframe
# remember to use variable uniquePlayerCount for the percentage calculation

AD_f['Percentage of Players'] = [((x / uniquePlayerCount)*100) for x in AD_f['Total Count']]

# formatting the DataFrame

format_dict4 = {
    "Total Count":"{:}",
    "Percentage of Players":"{:.2f}%"
}

cleanedAD_f = AD_f.style.format(format_dict4)

# printing result to the terminal

cleanedAD_f

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
