### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [2]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
csv_path = "Pymoli.csv"

# Read Purchasing File and store into Pandas data frame
pymoli_df = pd.read_csv(csv_path)
pymoli_df

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44
...,...,...,...,...,...,...,...
775,775,Aethedru70,21,Female,60,Wolf,3.54
776,776,Iral74,21,Male,164,Exiled Doomblade,1.63
777,777,Yathecal72,20,Male,67,"Celeste, Incarnation of the Corrupted",3.46
778,778,Sisur91,7,Male,101,Final Critic,4.19


## Player Count

* Display the total number of players


In [3]:
player_count = len(pymoli_df["SN"].unique())
player_count_df = pd.DataFrame({"Number of Players": [player_count]})
player_count_df

Unnamed: 0,Number of Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [4]:
item_count = len(pymoli_df["Item ID"].unique())
average_price = pymoli_df["Price"].mean()
purchase_count = len(pymoli_df["Purchase ID"].unique())
total_revenue = pymoli_df["Price"].sum()
pymoli_summary_table = pd.DataFrame({"Number of Unique Items" : [item_count],
                                    "Average Price": [average_price],
                                    "Number of Purchases": [purchase_count],
                                    "Total Revenue": [total_revenue]})
pymoli_summary_table

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,3.050987,780,2379.77


## Gender Demographics



* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [5]:
sorted_pymoli_df = pymoli_df.sort_values("SN")
cleaned_pymoli_df = sorted_pymoli_df.drop_duplicates(subset="SN", keep="first")
cleaned_pymoli_df.count()

Purchase ID    576
SN             576
Age            576
Gender         576
Item ID        576
Item Name      576
Price          576
dtype: int64

In [10]:
group_gender_df = cleaned_pymoli_df.groupby(["Gender"])
gender_counts = group_gender_df["Gender"].count()
gender_counts

Gender
Female                    81
Male                     484
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [11]:
gender_df = cleaned_pymoli_df[["Gender"]]
male_count = gender_df.loc[gender_df["Gender"] == "Male",:]
male_count = male_count["Gender"].count()
female_count = gender_df.loc[gender_df["Gender"] == "Female",:]
female_count = female_count["Gender"].count()
other_count = gender_df.loc[gender_df["Gender"] == "Other / Non-Disclosed",:]
other_count = other_count["Gender"].count()

percent_male = round((male_count/player_count)*100,2).astype("float")
percent_male = "{:.2f}%".format(percent_male)
percent_female = round((female_count/player_count)*100,2).astype("float")
percent_female = "{:.2f}%".format(percent_female)
percent_other = round((other_count/player_count)*100,2).astype("float")
percent_other = "{:.2f}%".format(percent_other)
percent_other
gender_demographic_df = pd.DataFrame({
                                    "Total Count":[male_count, female_count, other_count],
                                    "Percentage of Players":[percent_male, percent_female, percent_other]
                                    })
gender_demographic_df.index = ["Male", "Female", "Other/Non-Disclosed"]
gender_demographic_df


Unnamed: 0,Total Count,Percentage of Players
Male,484,84.03%
Female,81,14.06%
Other/Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender
#df.groupby(['Fruit','Name']).sum()
gender_df



* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [14]:
#df.groupby(['Fruit','Name']).sum()
grouped_pymoli_df = pymoli_df.groupby(["Gender"])
purchase_count = grouped_pymoli_df["Purchase ID"].count()
purchase_count = round(purchase_count,2).astype("float")
#round((male_count/player_count)*100,2).astype("float")


ave_purchase_price = grouped_pymoli_df["Price"].mean()
ave_purchase_price = ["$%.2f" % elem for elem in ave_purchase_price]
#formatting the numbers in the list [ '%.2f' % elem for elem in my_list ]

total_purchase_amount = grouped_pymoli_df["Price"].sum()
total_purchase_amount = ["$%.2f" % elem for elem in total_purchase_amount]
#total_purchase_amount

ave_total_per_person = [total_purchase_amount]/[gender_counts]
ave_total_per_person


#create pandas dataframe
#purchasing_analysis = pd.DataFrame({"Purchase Count": purchase_count,
    #                                "Average Purchase Price": ave_purchase_price,
   #                                "Total Purchase Value": total_purchase_amount,
  #                                 "Avg Total per Person": ave_total_per_person})
 #               
#purchasing_analysis



TypeError: unsupported operand type(s) for /: 'list' and 'list'

In [8]:
gender_counts = gender_df["Gender"].value_counts()
gender_counts

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

