In [1]:
#Importing libraries
import csv
import os
import pandas as pd

In [2]:
#path to csv file
csvpath = os.path.join('Resources', 'purchase_data.csv')

In [3]:
#opening and converting the csv to a dataframe
purchases_df = pd.read_csv(csvpath)

#retrieving a list with all the individual players
player_list = purchases_df["SN"].unique()

#obtaining total number of players
total_players = len(player_list)

#formatting the output as dataframe
total_players_df = pd.DataFrame({"Total players":[total_players]})
total_players_df

Unnamed: 0,Total players
0,576


In [4]:
#doing purchasing analysis

#obtaining the number of unique items sold
items_list = purchases_df["Item Name"].unique()
total_items = len(items_list)

#obtaining the average purchase price
average_purchase = purchases_df["Price"].mean()

#obtaining total number of purchases
#using Purchase ID to count since it's the column less likely to have missing values
total_purchases = purchases_df["Purchase ID"].count()

#obtaining total revenue
total_revenue = purchases_df["Price"].sum()

#formatting the output
#creating the dataframe
purchases_results_df = pd.DataFrame({"Unique items sold":[total_items],
                                     "Total purchases":[total_purchases],
                                     "Average purchase":[average_purchase],
                                     "Total revenue":[total_revenue]})
purchases_results_df

Unnamed: 0,Unique items sold,Total purchases,Average purchase,Total revenue
0,179,780,3.050987,2379.77


In [5]:
#gender demographics analysis

#creating a new dataframe witout duplicated players
player_gender_df = purchases_df.copy(deep=True)
player_gender_df.drop_duplicates(subset="SN", keep = 'first', inplace=True)

#obtaining all data necessary for calculations
counted_gender = player_gender_df["Gender"].value_counts()
male_count = counted_gender["Male"]
female_count = counted_gender["Female"]
other_count = counted_gender["Other / Non-Disclosed"]

#creating the output dataframe
gender_results_df = pd.DataFrame({" ":["Male", "Female", "Other / Non Disclosed"],
                                 "Total Count":[male_count, female_count, other_count],
                                 "Percentage of Players":[male_count/total_players,
                                                          female_count/total_players,
                                                          other_count/total_players]})

gender_results_df

Unnamed: 0,Unnamed: 1,Total Count,Percentage of Players
0,Male,484,0.840278
1,Female,81,0.140625
2,Other / Non Disclosed,11,0.019097


In [6]:
# analyzing purchases by gender

#creating a new dataframe with all the relevant data
gender_purchases_df = purchases_df.copy(deep=True)

#dropping unecessary columns
gender_purchases_df.drop("Purchase ID", inplace=True, axis=1)
gender_purchases_df.drop("SN", inplace=True, axis=1)
gender_purchases_df.drop("Age", inplace=True, axis=1)
gender_purchases_df.drop("Item ID", inplace=True, axis=1)
gender_purchases_df.drop("Item Name", inplace=True, axis=1)

#obtaining the number of purchases by gender
number_of_purchases_df = pd.DataFrame(gender_purchases_df["Gender"].value_counts())

number_of_purchases_df

Unnamed: 0,Gender
Male,652
Female,113
Other / Non-Disclosed,15


In [7]:
#obtaining the total $ purchases by gender
grouped_df = gender_purchases_df.groupby("Gender").sum("Price")

grouped_df

Unnamed: 0_level_0,Price
Gender,Unnamed: 1_level_1
Female,361.94
Male,1967.64
Other / Non-Disclosed,50.19


In [13]:
#obtaining the average purchase by gender
average_gender = grouped_df["Price"] / number_of_purchases_df["Gender"]

average_gender

Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
dtype: float64