In [1]:
#Importing libraries
import csv
import os
import pandas as pd

In [2]:
#path to csv file
csvpath = os.path.join('Resources', 'purchase_data.csv')

In [3]:
#opening and converting the csv to a dataframe
purchases_df = pd.read_csv(csvpath)

#retrieving a list with all the individual players
player_list = purchases_df["SN"].unique()

#obtaining total number of players
total_players = len(player_list)

#formatting the output as dataframe
total_players_df = pd.DataFrame({"Total players":[total_players]})
total_players_df

Unnamed: 0,Total players
0,576


In [4]:
#doing purchasing analysis

#obtaining the number of unique items sold
items_list = purchases_df["Item Name"].unique()
total_items = len(items_list)

#obtaining the average purchase price
average_purchase = purchases_df["Price"].mean()

#obtaining total number of purchases
#using Purchase ID to count since it's the column less likely to have missing values
total_purchases = purchases_df["Purchase ID"].count()

#obtaining total revenue
total_revenue = purchases_df["Price"].sum()

#formatting the output
#creating the dataframe
purchases_results_df = pd.DataFrame({"Unique items sold":[total_items],
                                     "Total purchases":[total_purchases],
                                     "Average purchase":[average_purchase],
                                     "Total revenue":[total_revenue]})
purchases_results_df

Unnamed: 0,Unique items sold,Total purchases,Average purchase,Total revenue
0,179,780,3.050987,2379.77


In [32]:
#gender demographics analysis

#creating a new dataframe witout duplicated players
player_gender_df = purchases_df.copy(deep=True)
player_gender_df.drop_duplicates(subset="SN", keep = 'first', inplace=True)

#obtaining all data necessary for calculations
counted_gender = player_gender_df["Gender"].value_counts()
male_count = counted_gender["Male"]
female_count = counted_gender["Female"]
other_count = counted_gender["Other / Non-Disclosed"]

#creating the output dataframe
gender_results_df = pd.DataFrame({" ":["Male", "Female", "Other / Non Disclosed"],
                                 "Total Count":[male_count, female_count, other_count],
                                 "Percentage of Players":[male_count/total_players,
                                                          female_count/total_players,
                                                          other_count/total_players]})

gender_results_df

Unnamed: 0,Unnamed: 1,Total Count,Percentage of Players
0,Male,484,0.840278
1,Female,81,0.140625
2,Other / Non Disclosed,11,0.019097


In [35]:
#purchasing analysis by gender

#slicing the dataframe by gender
male_df = purchases_df.loc[(purchases_df['Gender'] == 'Male'), :]
female_df = purchases_df.loc[(purchases_df['Gender'] == 'Female'), :]
other_df = purchases_df.loc[(purchases_df['Gender'] == 'Other / Non-Disclosed'), :]

#obtaining purchase count
male_purchases = len(male_df)
female_purchases = len(female_df)
other_purchases = len(other_df)

#obtainin total purchase value
male_total = male_df['Price'].sum()
female_total = female_df['Price'].sum()
other_total = other_df['Price'].sum()

#obtaining average purchase
male_avg = male_total / male_purchases
female_avg = female_total / female_purchases
other_avg = other_total / other_purchases

#obtaining average purchase by player by gender
male_p_avg = male_total / male_count
female_p_avg = female_total / female_count
other_p_avg = other_total / other_count


#building the results dataframe
results_df = pd.DataFrame({'Gender': ['Female', 'Male', 'Other / Non Disclosed'],
                          'Number of Purchases': [female_purchases, male_purchases, other_purchases],
                          'Total Purchase Value': [female_total, male_total, other_total],
                          'Average Purchase': [female_avg, male_avg, other_avg],
                          'Average Purchase by Player': [female_p_avg, male_p_avg, other_p_avg]})

results_df


Unnamed: 0,Gender,Number of Purchases,Total Purchase Value,Average Purchase,Average Purchase by Player
0,Female,113,361.94,3.203009,4.468395
1,Male,652,1967.64,3.017853,4.065372
2,Other / Non Disclosed,15,50.19,3.346,4.562727
