### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

## Player Count

* Display the total number of players

In [2]:
# counts total players by calculating the size of Unique names in SN
total_players = purchase_data["SN"].unique()
total_players_count = total_players.size
print(f"The total number of players is: {total_players_count}")


The total number of players is: 576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.

In [3]:
# unique items
# counts total items by making a unique array of item IDs, then finding the size of array
unique_items = purchase_data["Item ID"].unique()
unique_items = unique_items.size
print(f"The total number of unique items is: {unique_items}.")


# total purchases
# calculates by counting the size of purchase ID
total_purchases = purchase_data["Purchase ID"].count()
print(f"The total number of purchases is: {total_purchases}.")

# average price
# sums all purchase to calculate revenue then divide by total purchases
gross_revenue = purchase_data["Price"].sum()

average_price = gross_revenue/total_purchases
# round for dollars and cents
average_price = average_price.round(2)

print(f"The gross revenue is ${gross_revenue}, and the average price is ${average_price}")

The total number of unique items is: 179.
The total number of purchases is: 780.
The gross revenue is $2379.77, and the average price is $3.05


 * Create a summary data frame to hold the results

In [4]:
Game_statsDF = pd.DataFrame([{
    "Total Players": total_players_count,
    "Total Purchases": total_purchases,
    "Unique Items": unique_items,
    "Gross Revenue": gross_revenue,
    "Average Price": average_price
}])


* Optional: give the displayed data cleaner formatting

* Display the summary data frame


In [5]:
print(Game_statsDF)

   Total Players  Total Purchases  Unique Items  Gross Revenue  Average Price
0            576              780           179        2379.77           3.05


## Gender Demographics

* Percentage and Count of Male Players





* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed





In [6]:
# creates gender data
gender_data = purchase_data[['SN', 'Gender', "Price"]]
# filters gender data by male gender
players_male = gender_data[gender_data["Gender"] == "Male"]
# finds all unique male players
unique_male = players_male['SN'].unique()
# finds the size of the set of unique male players
count_male = unique_male.size
# calculates then prints the percentage of male players
percent_male = (count_male/total_players_count)*100
print(f"The player base is composed of {percent_male}% male players")

# filters gender data by male gender
players_female = gender_data[gender_data["Gender"] == "Female"]
# finds all unique female players
unique_female = players_female['SN'].unique()
# finds the size of the set of unique female players
count_female = unique_female.size
# calculates then prints the percentage of female players
percent_female = (count_female/total_players_count)*100
print(f"The player base is composed of {percent_female}% female players")

# filters gender data by other gender
players_other = gender_data[gender_data["Gender"] == "Other / Non-Disclosed"]
# finds all unique other players
unique_other = players_other['SN'].unique()
# finds the size of the set of unique other players
count_other = unique_other.size
# calculates then prints the percentage of other players
percent_other = (count_other/total_players_count)*100
print(f"The player base is composed of {percent_other}% other/nondisclosed players")



The player base is composed of 84.02777777777779% male players
The player base is composed of 14.0625% female players
The player base is composed of 1.9097222222222223% other/nondisclosed players



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [7]:
# because the purchase data is the root source, non-unique specified gender data is the same as
# purchase by gender data

# * Male Data *


# sets price data = Price column in male data
Male_purchase_count = players_male["Price"]
# counts total male purchases(non-unique)
Total_male_purchases = Male_purchase_count.count()
# creates a for loop to determine gross revenue
grossrev_male = 0
for x in Male_purchase_count:
    grossrev_male += x
# calculates average purchase price
average_price_male = grossrev_male/Total_male_purchases
# calculates average price per person for males (calls count_male for unique males)
average_price_unique_male = grossrev_male/count_male


# * Female Data *


# sets price data = Price column in female data
Female_purchase_count = players_female["Price"]
# counts total female purchases(non-unique)
Total_female_purchases = Female_purchase_count.count()
# creates a for loop to determine gross revenue
grossrev_female = 0
for x in Female_purchase_count:
    grossrev_female += x
# calculates average purchase price
average_price_female = grossrev_male/Total_female_purchases
# calculates average price per person for females (calls count_female for unique females)
average_price_unique_female = grossrev_female/count_female



# * Other Data *



# sets price data = Price column in other data
Other_purchase_count = players_other["Price"]
# counts total other purchases(non-unique)
Total_other_purchases = Other_purchase_count.count()
# creates a for loop to determine gross revenue
grossrev_other = 0
for x in Other_purchase_count:
    grossrev_other += x

# calculates average purchase price
average_price_other = grossrev_other/Total_other_purchases
# calculates average price per person for other (calls count_other for unique others)
average_price_unique_other = grossrev_other/count_other


Gender_statsDF = pd.DataFrame([{
    "Total Males": count_male,
    "Total Male Purchases" : Total_male_purchases,
    "Gross Revenue by Males" : grossrev_male,
    "Average Price Males" : average_price_male,
    "Average Price by Unique Males" : average_price_unique_male,
    "Total Females": count_female,
    "Total Female Purchases" : Total_female_purchases,
    "Gross Revenue by Females" : grossrev_female,
    "Average Price Females" : average_price_female,
    "Average Price by Unique Females" : average_price_unique_female,
    "Total Other": count_other,
    "Total Other Purchases" : Total_other_purchases,
    "Gross Revenue by Other" : grossrev_other,
    "Average Price other" : average_price_other,
    "Average Price by Unique Other" : average_price_unique_other

}])

print(Gender_statsDF)






   Total Males  Total Male Purchases  Gross Revenue by Males  \
0          484                   652                 1967.64   

   Average Price Males  Average Price by Unique Males  Total Females  \
0             3.017853                       4.065372             81   

   Total Female Purchases  Gross Revenue by Females  Average Price Females  \
0                     113                    361.94              17.412743   

   Average Price by Unique Females  Total Other  Total Other Purchases  \
0                         4.468395           11                     15   

   Gross Revenue by Other  Average Price other  Average Price by Unique Other  
0                   50.19                3.346                       4.562727  


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [8]:
ages = [0, 10, 15, 19, 25, 29, 35, 39, 40, 45]
names = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49"]

players_age = purchase_data[["Age", "Price"]]
players_age["Age Demographics"] = pd.cut(players_age["Age"], ages, labels=names, include_lowest=True)

age_group = players_age.groupby("Age Demographics")
age_count = age_group.count()
age_count = age_count.drop(columns="Price")
age_total = age_count.sum()

age_percent = (age_count/age_total)*100

agedemoDF = age_count.merge(age_percent, "outer", left_on="Age Demographics", right_index=True)
agedemoDF = agedemoDF.rename(columns={"Age_x":"Number Of People", "Age_y":"Percent Of Population"})
print(agedemoDF)


                  Number Of People  Percent Of Population
Age Demographics                                         
<10                             32               4.102564
10-14                           54               6.923077
15-19                          101              12.948718
20-24                          424              54.358974
25-29                           42               5.384615
30-34                           87              11.153846
35-39                           27               3.461538
40-44                            6               0.769231
45-49                            7               0.897436


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_age["Age Demographics"] = pd.cut(players_age["Age"], ages, labels=names, include_lowest=True)


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [9]:
players_age_money = purchase_data[["Age", "Price"]]
players_age_money["Age Demographics"] = pd.cut(players_age["Age"], ages, labels=names, include_lowest=True)
ages = players_age_money.groupby("Age Demographics")
age_purchase_total = ages.sum()
age_counter = ages.count()
age_purchase_average = ages.mean()

AgePurchDF = age_counter.merge(age_purchase_total, "outer", left_on="Age Demographics", right_index=True)
AgePurchDF = AgePurchDF.merge(age_purchase_average, "outer", left_on="Age Demographics", right_index=True)
AgePurchDF = AgePurchDF.rename(columns={"Price_x":"Total Individuals" ,"":"","Price_y":"Total Purchase Value",
                                                                "Price": "Average Purchase Price/Person",
                                        "Age": "Average Purchase Price"})
AgePurchDF = AgePurchDF.drop(columns=["Age_x", "Age_y"])
print(AgePurchDF)

                  Total Individuals  Total Purchase Value  \
Age Demographics                                            
<10                              32                108.96   
10-14                            54                156.60   
15-19                           101                307.24   
20-24                           424               1295.96   
25-29                            42                111.10   
30-34                            87                266.03   
35-39                            27                 95.64   
40-44                             6                 16.71   
45-49                             7                 21.53   

                  Average Purchase Price  Average Purchase Price/Person  
Age Demographics                                                         
<10                             8.468750                       3.405000  
10-14                          13.962963                       2.900000  
15-19                          1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players_age_money["Age Demographics"] = pd.cut(players_age["Age"], ages, labels=names, include_lowest=True)


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [10]:
top_spenders = purchase_data[["SN", "Price"]]
top_spenders_group = top_spenders.groupby("SN")

top_spenders_group_count = top_spenders_group["Price"].count()



top_spenders_group_sum = top_spenders_group.sum()
top_spenders_group_average = top_spenders_group.mean()


top_spenders_group_sum = top_spenders_group_sum.merge(top_spenders_group_count, "outer", left_index=True, right_index=True)
top_spenders_group_sum = top_spenders_group_sum.merge(top_spenders_group_average, "outer", left_index=True, right_index=True)
top_spenders_group_sum = top_spenders_group_sum.rename(columns={"Price_x":"Amount Purchased" ,"Price_y":"Total Purchases",
                                                                "Price": "Average Purchase Price"})
top_spenders_group_sum = top_spenders_group_sum.sort_values("Amount Purchased", ascending=False).head(5)

print(top_spenders_group_sum)




             Amount Purchased  Total Purchases  Average Purchase Price
SN                                                                    
Lisosia93               18.96                5                3.792000
Idastidru52             15.45                4                3.862500
Chamjask73              13.83                3                4.610000
Iral74                  13.62                4                3.405000
Iskadarya95             13.10                3                4.366667


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [11]:
item_data = purchase_data[["Item ID", "Item Name", "Price"]]
item_group = item_data.groupby(["Item ID", "Item Name"])
item_count = item_group.count()
item_avg_price = item_group.mean()
item_total = item_group.sum()
item_merge = item_count.merge(item_total, "outer", right_index=True, left_on=["Item ID", "Item Name"])
item_merge = item_merge.rename(columns={"Price_x":"Number of Purchases", "Price_y":"Total Purchase Value"})
item_merge = item_merge.merge(item_avg_price, "outer", left_index=True, right_index=True)
item_merge = item_merge.rename(columns={"Price":"Average Purchased Price"})
item_merge = item_merge.sort_values("Number of Purchases", ascending=False)
print(item_merge)

                                                      Number of Purchases  \
Item ID Item Name                                                           
92      Final Critic                                                   13   
178     Oathbreaker, Last Hope of the Breaking Storm                   12   
145     Fiery Glass Crusader                                            9   
132     Persuasion                                                      9   
108     Extraction, Quickblade Of Trembling Hands                       9   
...                                                                   ...   
42      The Decapitator                                                 1   
51      Endbringer                                                      1   
118     Ghost Reaver, Longsword of Magic                                1   
104     Gladiator's Glaive                                              1   
91      Celeste                                                         1   

## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [12]:
item_merge = item_merge.sort_values("Total Purchase Value", ascending=False)
print(item_merge)


                                                      Number of Purchases  \
Item ID Item Name                                                           
92      Final Critic                                                   13   
178     Oathbreaker, Last Hope of the Breaking Storm                   12   
82      Nirvana                                                         9   
145     Fiery Glass Crusader                                            9   
103     Singed Scalpel                                                  8   
...                                                                   ...   
28      Flux, Destroyer of Due Diligence                                2   
126     Exiled Mithril Longsword                                        1   
125     Whistling Mithril Warblade                                      2   
104     Gladiator's Glaive                                              1   
42      The Decapitator                                                 1   