### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import os

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load) 
df = pd.DataFrame(purchase_data)

## Player Count

* Display the total number of players


In [2]:
index = df.index
number_of_rows = len(index)
print(number_of_rows)

780


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
average = purchase_data["Price"].mean()
average
price_frequency = purchase_data["Price"].mode()
price_frequency
price_min = purchase_data["Price"].min()
price_min
price_max = purchase_data["Price"].max()
price_max
price_count = len(purchase_data["Price"].unique())
total_profits = (purchase_data["Price"].sum())


print(average)
print(price_frequency)
print(price_min)
print(price_max)
print(price_count)
print(total_profits)

  

summary_df = pd.DataFrame({"Average Price": [average],
                              "Price Frequency": [price_frequency],
                              "Minimum Price": [price_min],
                              "Maximum Price": [price_max],
                          "Price Count":[price_count],
                          "Total Revenue":[total_profits]})
summary_df




3.050987179487176
0    4.4
dtype: float64
1.0
4.99
145
2379.77


Unnamed: 0,Average Price,Price Frequency,Minimum Price,Maximum Price,Price Count,Total Revenue
0,3.050987,0 4.4 dtype: float64,1.0,4.99,145,2379.77


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [4]:
count = purchase_data["Gender"].value_counts()
count
print(count)
total_count = sum(count)
percent = purchase_data["Gender"].value_counts()/total_count 
print(percent)



gender_group = pd.DataFrame({'Gender':['Male','Female','Other / Non-Disclosed'], 'Amount':[652,113,15], 'Percentage':['83.6%', '14.4%','2%']})
print(gender_group)    



Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64
Male                     0.835897
Female                   0.144872
Other / Non-Disclosed    0.019231
Name: Gender, dtype: float64
                  Gender  Amount Percentage
0                   Male     652      83.6%
1                 Female     113      14.4%
2  Other / Non-Disclosed      15         2%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [5]:

male_purchase_data = purchase_data.loc[purchase_data["Gender"] == "Male", :]
male_purchase_count = len(male_purchase_data)
avg_male_purchase_price = purchase_data.loc[purchase_data["Gender"] == "Male", ["Price"]].mean()
total_male_purchase_value = purchase_data.loc[purchase_data["Gender"] == "Male", ["Price"]].sum()

female_purchase_data = purchase_data.loc[purchase_data["Gender"] == "Female", :]
female_purchase_count = len(female_purchase_data)
avg_female_purchase_price = purchase_data.loc[purchase_data["Gender"] == "Female", ["Price"]].mean()
total_female_purchase_value = purchase_data.loc[purchase_data["Gender"] == "Female", ["Price"]].sum()

other_purchase_data = purchase_data.loc[purchase_data["Gender"] == "Other / Non-Disclosed", :]
other_purchase_count = len(other_purchase_data)
avg_other_purchase_price = purchase_data.loc[purchase_data["Gender"] == "Other / Non-Disclosed", ["Price"]].mean()
total_other_purchase_value = purchase_data.loc[purchase_data["Gender"] == "Other / Non-Disclosed", ["Price"]].sum()

avg_male_purchase_total_person = total_male_purchase_value / male_purchase_count
avg_female_purchase_total_person = total_female_purchase_value / female_purchase_count
avg_other_purchase_total_person = total_other_purchase_value / other_purchase_count

gender_purchase_group = pd.DataFrame({'Gender':['Male','Female', 'Other'], 'Price':['Purchase Count','Average Purchase Price','Total Purchase Price','Total Purchase Value','Avg Total Purchase per Person']})


   


ValueError: arrays must all be same length

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [8]:
average_age = purchase_data["Age"].mean()
average_age
age_frequency = purchase_data["Age"].mode()
age_frequency
age_min = purchase_data["Age"].min()
age_min
age_max = purchase_data["Age"].max()
age_max
age_count = len(purchase_data["Age"].unique())

print(average_age)
print(age_frequency)
print(age_min)
print(age_max)


bins = [0,9,14,19,24,29,34,39,46]
group_names = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

df["Age Summary"] = pd.cut(df["Age"], bins, labels=group_names, include_lowest=True)
df

age_group = purchase_data["Age Summary"].value_counts()
print(age_group)

age_percent = purchase_data["Age Summary"].value_counts()/total_count 
print(age_percent)



22.714102564102564
0    20
dtype: int64
7
45
20-24    365
15-19    136
25-29    101
30-34     73
35-39     41
10-14     28
<10       23
40+       13
Name: Age Summary, dtype: int64
20-24    0.467949
15-19    0.174359
25-29    0.129487
30-34    0.093590
35-39    0.052564
10-14    0.035897
<10      0.029487
40+      0.016667
Name: Age Summary, dtype: float64


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [27]:
bins = [0, 9, 14, 19, 24, 29, 34, 39, 46]
groups_names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
purchase_data["Age Group"] = pd.cut(purchase_data["Age"], bins=bins, labels=groups_names)
age_purchase_count = age_group.count()

avg_age_purchase_price = round(age_group.mean(),2)
total_age_purchase_value = round(age_group.sum(),2)
avg_total_age_purchase_person = round(total_age_purchase_value / age_count,2)

age_purchase_table = pd.DataFrame({
    "Purchase Count": age_purchase_count, 
    "Average Purchase Price": avg_age_purchase_price,
    "Total Purchase Value": total_age_purchase_value,
    "Avg Total Purchase per Person": avg_total_age_purchase_person},index=[0]) 



## Top Spenders

In [28]:
top_spenders = purchase_data.groupby
spender_purchase_count = top_spenders("Purchase ID").count()
average_spender_purchase_price = round(top_spenders("Price").mean(),2)
total_spender_purchase_value = top_spenders("Price").sum()

top_spenders_table = pd.DataFrame({"Purchase Count": spender_purchase_count,"Average Purchase Price": average_spender_purchase_price,"Total Purchase Value": total_spender_purchase_value},index=[0])

ValueError: Shape of passed values is (8, 3), indices imply (1, 3)

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [29]:
popular_items_list = purchase_data[["Item ID", "Item Name", "Price"]]
popular_items = popular_items_list.groupby(["Item ID","Item Name"])
item_purchase_count = popular_items["Price"].count()
item_price = popular_items["Price"].sum()
item_purchase_value = item_price / item_purchase_count

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



NameError: name 'most_popular_items' is not defined