### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
PurchaseFile = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
Purchase_df = pd.read_csv(PurchaseFile)
Purchase_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,92,Final Critic,4.19


## Player Count

In [2]:
#Pandas dataframes are made up of series, which is each column
#converted array to list
len(list(Purchase_df["SN"].values))

780

In [3]:
#create a variable for a group that includes Price, Gender and Names
total_players = len(set(Purchase_df["SN"].values))
total_players

576

* Display the total number of players


In [4]:
pd.DataFrame({
    "Total Players":[total_players]
})

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [5]:
#setting my values to place in dataframe; functions
num_uniqueitems = len(set(Purchase_df["Item ID"].values))
num_avgprice = round(Purchase_df["Price"].mean(),2)
num_purchases = len(list(Purchase_df["Purchase ID"].values))
num_total_rev = sum(list(Purchase_df["Price"].values))

In [6]:
#reference
pd.DataFrame({
    "Number of Unique Items":[num_uniqueitems], 
    "Average Price":[num_avgprice],	
    "Number of Purchases":[num_purchases],
    "Total Revenue":[num_total_rev],
})


Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [7]:
#pass a subset of dataframe that will be useful for gender and purchase analysis

Sub_Gender = Purchase_df[["Gender", "SN", "Price", "Purchase ID"]].copy()
Sub_Gender

#groupiing - Gender
Group_Gender = Sub_Gender.groupby('Gender')

##setting my values to place in dataframe; functions
Group_Total = Group_Gender["SN"].nunique()
Group_Percent = round(Group_Gender["SN"].nunique()/total_players * 100, 1)

In [8]:
#reference the calc to create df.

Gender_Summary = pd.DataFrame({
    "Total Count":Group_Total, 
    "Percentage of Players":Group_Percent,	
   })

#sorting by total count 
Gender_Summary = Gender_Summary.sort_values("Total Count", ascending=False)
Gender_Summary["Percentage of Players"] = Gender_Summary["Percentage of Players"].map("{0:.0f}%".format)
Gender_Summary

Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84%
Female,81,14%
Other / Non-Disclosed,11,2%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [9]:
#pass a subset of dataframe that will be useful for gender and purchase analysis

Sub_Gender = Purchase_df[["Gender", "SN", "Price", "Purchase ID"]].copy()
Sub_Gender

#groupiing - Gender
Group_Gender = Sub_Gender.groupby('Gender')

##setting my values to place in dataframe; functions
Gender_Purchase = Group_Gender["Purchase ID"].nunique()
Gender_Avg = Group_Gender["Price"].mean()
GenderTot_Value = (Group_Gender["Price"]).sum()
GenderTot_Avg = (Group_Gender["Price"]).sum()/total_players

In [10]:
#reference the calc to create df.

Purchase_Summary = pd.DataFrame({
    "Purchase Count":Gender_Purchase, 
    "Avg Purchase Price":Gender_Avg,
    "Total Purchase Value":GenderTot_Value,
    "Avg Total per Person":GenderTot_Avg,	
   })
Purchase_Summary 

Unnamed: 0_level_0,Purchase Count,Avg Purchase Price,Total Purchase Value,Avg Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3.203009,361.94,0.628368
Male,652,3.017853,1967.64,3.416042
Other / Non-Disclosed,15,3.346,50.19,0.087135


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [11]:
# Figure out the minimum and maximum views 
print(Purchase_df["Age"].max())
print(Purchase_df["Age"].min())


45
7


In [13]:
Sub_Age = Purchase_df[["Age", "SN", "Price", "Purchase ID"]].copy()

# Create the bins in which Data will be held
bins=[0, 9, 14, 19, 24, 29, 34, 39, 44, 49]
labels = ["<10", "10-14", "15-19", "20-24","25-29","30-34","35-39","40-44","45+"]

#Place the data series into a new column inside of the DataFrame
Sub_Age["Age Group"]= pd.cut(Sub_Age["Age"], bins, labels=labels, include_lowest=True)
Sub_Age.head()


Unnamed: 0,Age,SN,Price,Purchase ID,Age Group
0,20,Lisim78,3.53,0,20-24
1,40,Lisovynya38,1.56,1,40-44
2,24,Ithergue48,4.88,2,20-24
3,24,Chamassasya86,3.27,3,20-24
4,23,Iskosia90,1.44,4,20-24


In [19]:
Age_Ranges = Sub_Age.groupby("Age Group")

#Calculate the numbers and percentages by age group
Age_Total = Age_Ranges["SN"].nunique()
Age_Percent = round(Age_Total/total_players*100, 1)
print(Age_Percent)

Age Group
<10       3.0
10-14     3.8
15-19    18.6
20-24    44.8
25-29    13.4
30-34     9.0
35-39     5.4
40-44     1.9
45+       0.2
Name: SN, dtype: float64


In [20]:
#Create dataframe for age demographics
AgeDemo_Summary = pd.DataFrame({
    "Total Count":Age_Total, 
    "Percentage of Players":Age_Percent,
   })
AgeDemo_Summary

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,3.0
10-14,22,3.8
15-19,107,18.6
20-24,258,44.8
25-29,77,13.4
30-34,52,9.0
35-39,31,5.4
40-44,11,1.9
45+,1,0.2


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

