### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load
myfile = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
myfile_df = pd.read_csv(myfile)
pd.options.display.float_format = '${:,.2f}'.format

In [2]:
myfile_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,467,Adairialis76,16,Male,123,Twilight's Carver,$2.28
1,142,Adastirin33,35,Female,175,Woeful Adamantite Claymore,$4.48
2,388,Aeda94,17,Male,128,"Blazeguard, Reach of Eternity",$4.91
3,28,Aela59,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",$4.32
4,630,Aelaria33,23,Male,171,Scalpel,$1.79


In [4]:
new_df = myfile_df.drop_duplicates()
new_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,467,Adairialis76,16,Male,123,Twilight's Carver,$2.28
1,142,Adastirin33,35,Female,175,Woeful Adamantite Claymore,$4.48
2,388,Aeda94,17,Male,128,"Blazeguard, Reach of Eternity",$4.91
3,28,Aela59,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",$4.32
4,630,Aelaria33,23,Male,171,Scalpel,$1.79
...,...,...,...,...,...,...,...
775,54,Zhisrisu83,10,Male,25,Hero Cane,$4.35
776,141,Zhisrisu83,10,Male,60,Wolf,$3.54
777,442,Zontibe81,21,Male,84,Arcane Gem,$3.79
778,17,Zontibe81,21,Male,161,Devine,$1.76


## Player Count

* Display the total number of players


In [5]:
total_players = new_df["SN"].nunique()
total_players

576

## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [6]:
unique_items = new_df["Item Name"].nunique()
unique_items

179

In [7]:
purchase_count = new_df["Item Name"].count()
purchase_count

780

In [8]:
avg_price = new_df["Price"].mean()
round(avg_price, 2)

3.05

In [9]:
total_revenue = round(new_df["Price"].sum(), 2)
total_revenue

2379.77

In [10]:
# Create a summary data frame to hold the results
summary_table_df = pd.DataFrame({'Number of Unique Items': [unique_items], 
                                 'Average Price': (avg_price),
                                 'Number of Purchases': (purchase_count),
                                 'Total Revenue': "$"+str(total_revenue)})
summary_table_df

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


In [33]:
grouped_gender_df = new_df.groupby('Gender').count()
grouped_gender_df

Unnamed: 0_level_0,Purchase ID,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,81,81,81,81,81,81
Male,484,484,484,484,484,484
Other / Non-Disclosed,11,11,11,11,11,11


* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [84]:
new_df = myfile_df.drop_duplicates(subset="SN")
new_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,467,Adairialis76,16,Male,123,Twilight's Carver,$2.28
1,142,Adastirin33,35,Female,175,Woeful Adamantite Claymore,$4.48
2,388,Aeda94,17,Male,128,"Blazeguard, Reach of Eternity",$4.91
3,28,Aela59,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",$4.32
4,630,Aelaria33,23,Male,171,Scalpel,$1.79
...,...,...,...,...,...,...,...
769,548,Yathecal82,20,Female,75,Brutality Ivory Warmace,$2.42
772,93,Yathedeu43,22,Male,88,"Emberling, Defender of Delusions",$3.75
774,572,Yoishirrala98,17,Female,145,Fiery Glass Crusader,$4.58
775,54,Zhisrisu83,10,Male,25,Hero Cane,$4.35


In [85]:
count_of_males = (new_df["Gender"] == "Male").sum()
count_of_males

484

In [15]:
count_of_females = (new_df["Gender"] == "Female").sum()
count_of_females

81

In [16]:
count_of_other = (new_df["Gender"] == "Other / Non-Disclosed").sum()
count_of_other

11

In [34]:
male_percent = count_of_males / total_players *100
male_percent

84.02777777777779

In [35]:
female_percent = count_of_females / total_players *100
female_percent

14.0625

In [38]:
other_percent = count_of_other / total_players *100
other_percent

1.9097222222222223

In [90]:
# Group items by the "Gender" column
grouped_gender_df = myfile_df.groupby("Gender")
grouped_gender_df

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000025ADBA10760>

In [91]:
grouped_purchase_count = grouped_gender_df["Item Name"].count()
grouped_purchase_count

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Item Name, dtype: int64

In [92]:
grouped_avg_price = grouped_gender_df["Price"].mean()
grouped_avg_price

Gender
Female                  $3.20
Male                    $3.02
Other / Non-Disclosed   $3.35
Name: Price, dtype: float64

In [93]:
grouped_avg_total = grouped_gender_df["Price"].sum()
grouped_avg_total

Gender
Female                    $361.94
Male                    $1,967.64
Other / Non-Disclosed      $50.19
Name: Price, dtype: float64

In [72]:
grouped_purchase_value = grouped_gender_df["Price"].sum()
grouped_purchase_value

Gender
Female                    $361.94
Male                    $1,967.64
Other / Non-Disclosed      $50.19
Name: Price, dtype: float64

In [107]:
grouped_price_per_person = grouped_gender_df["Price"].sum()
grouped_price_per_person 

Gender
Female                    $361.94
Male                    $1,967.64
Other / Non-Disclosed      $50.19
Name: Price, dtype: float64


* The below each broken by gender

* Purchase Count

* Average Purchase Price

* Total Purchase Value

* Average Purchase Total per Person by Gender


In [99]:
# Create a summary data frame to hold the results by gender
gender_summary_df = pd.DataFrame({'Purchase Count': (grouped_purchase_count),
                                  'Average Purchase Price': round((grouped_avg_price),2).astype(float).map("${:,.2f}".format),
                                  'Total Purchase Value': (grouped_purchase_value).astype(float).map("${:,.2f}".format),
                                  'Avg Purchase Total per Person': (grouped_price_per_person)
                                })
gender_summary_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Purchase Total per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$3.20
Male,652,$3.02,"$1,967.64",$3.02
Other / Non-Disclosed,15,$3.35,$50.19,$3.35


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [109]:
# Figure out the min and max age values so we know what the edges are
print(myfile_df["Age"].max())
print(myfile_df["Age"].min())

45
7


In [111]:
# Create bins for Ages
bins = [1, 10, 15, 20, 25, 30, 35, 40, 45]

group_labels = ["<10", "11 to 14", "15 to 19", "20 to 24", "25 to 29", "30 to 34", "35 to 39", "40+"]

In [119]:
new_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,467,Adairialis76,16,Male,123,Twilight's Carver,$2.28
1,142,Adastirin33,35,Female,175,Woeful Adamantite Claymore,$4.48
2,388,Aeda94,17,Male,128,"Blazeguard, Reach of Eternity",$4.91
3,28,Aela59,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",$4.32
4,630,Aelaria33,23,Male,171,Scalpel,$1.79
...,...,...,...,...,...,...,...
769,548,Yathecal82,20,Female,75,Brutality Ivory Warmace,$2.42
772,93,Yathedeu43,22,Male,88,"Emberling, Defender of Delusions",$3.75
774,572,Yoishirrala98,17,Female,145,Fiery Glass Crusader,$4.58
775,54,Zhisrisu83,10,Male,25,Hero Cane,$4.35


In [120]:
# Categorize the existing players by adding a new column to the data frame
new_df["Age Range"] = pd.cut(new_df["Age"], bins, right=False, labels=group_labels, include_lowest=True)
new_df.head(30)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["Age Range"] = pd.cut(new_df["Age"], bins, right=False, labels=group_labels, include_lowest=True)


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Range
0,467,Adairialis76,16,Male,123,Twilight's Carver,$2.28,15 to 19
1,142,Adastirin33,35,Female,175,Woeful Adamantite Claymore,$4.48,35 to 39
2,388,Aeda94,17,Male,128,"Blazeguard, Reach of Eternity",$4.91,15 to 19
3,28,Aela59,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",$4.32,20 to 24
4,630,Aelaria33,23,Male,171,Scalpel,$1.79,20 to 24
5,766,Aelastirin39,23,Male,58,"Freak's Bite, Favor of Holy Might",$4.14,20 to 24
7,705,Aelidru27,22,Male,183,Dragon's Greatsword,$1.09,20 to 24
8,87,Aelin32,20,Male,151,Severance,$3.40,20 to 24
11,428,Aelly27,24,Male,14,Possessed Core,$2.61,20 to 24
13,286,Aellynun67,25,Male,153,Mercenary Sabre,$3.74,25 to 29


In [122]:
# Calculate the numbers and percentages for each age group
# type(myfile_df["Age Range"])
bin_counts = new_df["Age Range"].value_counts()
bin_counts

20 to 24    258
15 to 19    107
25 to 29     77
30 to 34     52
35 to 39     31
11 to 14     22
<10          17
40+          11
Name: Age Range, dtype: int64

In [123]:
age_percent = bin_counts / total_players * 100
age_percent

20 to 24   $44.79
15 to 19   $18.58
25 to 29   $13.37
30 to 34    $9.03
35 to 39    $5.38
11 to 14    $3.82
<10         $2.95
40+         $1.91
Name: Age Range, dtype: float64

In [124]:
# Create a summary data frame to hold the results and display it
gender_ranges_df = pd.DataFrame({"Total Count": (bin_counts),
                                 "Percentage of Players": round((age_percent), 2).astype(float).map("{:,.2f}%".format)
                                })
gender_ranges_df

Unnamed: 0,Total Count,Percentage of Players
20 to 24,258,44.79%
15 to 19,107,18.58%
25 to 29,77,13.37%
30 to 34,52,9.03%
35 to 39,31,5.38%
11 to 14,22,3.82%
<10,17,2.95%
40+,11,1.91%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [129]:
grouped_age_range = new_df.groupby("Age Range")
grouped_age_range.count()

Unnamed: 0_level_0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
<10,17,17,17,17,17,17,17
11 to 14,22,22,22,22,22,22,22
15 to 19,107,107,107,107,107,107,107
20 to 24,258,258,258,258,258,258,258
25 to 29,77,77,77,77,77,77,77
30 to 34,52,52,52,52,52,52,52
35 to 39,31,31,31,31,31,31,31
40+,11,11,11,11,11,11,11


In [130]:
# Categorize the existing players by adding a new column to the data frame
new_group_df = myfile_df[["Age Range", "Price"]].groupby(["Age Range"]).agg(['count', 'sum', 'mean'])
new_group_df

Unnamed: 0_level_0,Price,Price,Price
Unnamed: 0_level_1,count,sum,mean
Age Range,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
<10,23,$77.13,$3.35
11 to 14,28,$82.78,$2.96
15 to 19,136,$412.89,$3.04
20 to 24,365,"$1,114.06",$3.05
25 to 29,101,$293.00,$2.90
30 to 34,73,$214.00,$2.93
35 to 39,41,$147.67,$3.60
40+,12,$36.54,$3.05


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [131]:
grouped_purchase_count_df = myfile_df.groupby("Item Name")
grouped_purchase_count_df.count()

Unnamed: 0_level_0,Purchase ID,SN,Age,Gender,Item ID,Price,Age Range
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Abyssal Shard,5,5,5,5,5,5,5
"Aetherius, Boon of the Blessed",5,5,5,5,5,5,5
Agatha,6,6,6,6,6,6,6
Alpha,3,3,3,3,3,3,3
"Alpha, Oath of Zeal",3,3,3,3,3,3,3
...,...,...,...,...,...,...,...
Wolf,8,8,8,8,8,8,8
"Wolf, Promise of the Moonwalker",6,6,6,6,6,6,6
Worldbreaker,4,4,4,4,4,4,4
Yearning Crusher,3,3,3,3,3,3,3


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [135]:
# Retrieve ID, Item Name, Item Price columns
another_df = myfile_df[["SN", "Item ID", "Item Name", "Price"]]
another_df.sort_values("SN")

Unnamed: 0,SN,Item ID,Item Name,Price
0,Adairialis76,123,Twilight's Carver,$2.28
1,Adastirin33,175,Woeful Adamantite Claymore,$4.48
2,Aeda94,128,"Blazeguard, Reach of Eternity",$4.91
3,Aela59,119,"Stormbringer, Dark Blade of Ending Misery",$4.32
4,Aelaria33,171,Scalpel,$1.79
...,...,...,...,...
775,Zhisrisu83,25,Hero Cane,$4.35
776,Zhisrisu83,60,Wolf,$3.54
777,Zontibe81,84,Arcane Gem,$3.79
778,Zontibe81,161,Devine,$1.76


In [138]:
# Sort Purchase Count in Decending order and preview the data frame
user_purchase_count = new_df.groupby("SN")
user_purchase_count.sum()

Unnamed: 0_level_0,Item ID,Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1
Adairialis76,123,$2.28
Adastirin33,175,$4.48
Aeda94,128,$4.91
Aela59,119,$4.32
Aelaria33,171,$1.79
...,...,...
Yathecal82,241,$6.22
Yathedeu43,143,$6.02
Yoishirrala98,145,$4.58
Zhisrisu83,85,$7.89


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [143]:
myfile_df["SN"].value_counts()

Lisosia93      5
Iral74         4
Idastidru52    4
Chamimla85     3
Chamjask73     3
              ..
Rairith81      1
Shaidanu32     1
Tyarithn67     1
Lisossala30    1
Aesri53        1
Name: SN, Length: 576, dtype: int64

In [149]:
# total_value = myfile_df.sort_values("Price", ascending=False)
# total_value.head() 

In [148]:
stats_df = new_df.groupby("Item Name")
stats_df.count()

Unnamed: 0_level_0,SN,Item ID,Price
Item Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Abyssal Shard,5,5,5
"Aetherius, Boon of the Blessed",5,5,5
Agatha,6,6,6
Alpha,3,3,3
"Alpha, Oath of Zeal",3,3,3
...,...,...,...
Wolf,8,8,8
"Wolf, Promise of the Moonwalker",6,6,6
Worldbreaker,4,4,4
Yearning Crusher,3,3,3
