### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np
# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

## Player Count

* Display the total number of players


In [2]:
player_count = len(purchase_data['SN'].value_counts())
player_count

player_count_df = pd.DataFrame({
    'Player Count': [player_count]
})
player_count_df

Unnamed: 0,Player Count
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [3]:
#Purchase ID, SN, Age, Gender, Item ID, Item Name, Price 
#got an error the first time because the last parenthesis was in the wrong place, have to capture the unique 
unique_item_count = len(purchase_data['Item ID'].unique())
average_price = (purchase_data['Price'].mean())
total_price = (purchase_data['Price']).sum()
total_purchases = (purchase_data['Purchase ID'].count())


purchase_calculations_df = pd.DataFrame ({
    'Number of unique items: ': [unique_item_count],
    'Average Price of Items purchased: ': [average_price],
    'Total calculated price: ': [total_price],
    'The total number of purchases': [total_purchases]
    
    
})

purchase_calculations_df['Total calculated price: ']= purchase_calculations_df['Total calculated price: '].map('${:.0f}'.format)
purchase_calculations_df['Average Price of Items purchased: ']= purchase_calculations_df['Average Price of Items purchased: '].map('${:.0f}'.format)

purchase_calculations_df.head()
 

Unnamed: 0,Number of unique items:,Average Price of Items purchased:,Total calculated price:,The total number of purchases
0,179,$3,$2380,780


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [4]:
#groupby gender
by_gender_df = purchase_data.groupby('Gender')

# #count 
gender_count = by_gender_df.nunique()
# #gender_count 
gender_count_male = gender_count['SN']['Male']
gender_count_female = gender_count['SN']['Female']
gender_count_other = gender_count['SN']['Other / Non-Disclosed']
gender_count_total = gender_count_male + gender_count_female + gender_count_other

# #percentage calculation 
male_per = (gender_count_male / gender_count_total)*100 
female_per = (gender_count_female / gender_count_total)*100
other_per = (gender_count_other / gender_count_total)*100
percent = [male_per , female_per, other_per]
gender_count_list = [gender_count_male, gender_count_female, gender_count_other]
# #store in DataFrame

gender_df = pd.DataFrame ({
    'Percentage of Male Players: ': [male_per],
    'Count of Male Players: ': [gender_count_male], 
    'Percentage of Male Players: ': [male_per],
    'Count of Female Players: ': [gender_count_female], 
    'Percentage of Female Players: ': [female_per],
    'Count of Other/Non-Disclosed Players: ': [gender_count_other], 
    'Percentage of Other/Non-Disclosed Players: ': [other_per],
})

gender_df['Percentage of Male Players: '] = gender_df['Percentage of Male Players: '].map('{:.0f}%'.format)
gender_df['Percentage of Other/Non-Disclosed Players: '] = gender_df['Percentage of Other/Non-Disclosed Players: '].map('{:.0f}%'.format)
gender_df['Percentage of Female Players: '] = gender_df['Percentage of Female Players: '].map('{:.0f}%'.format)


gender_df.head()


Unnamed: 0,Percentage of Male Players:,Count of Male Players:,Count of Female Players:,Percentage of Female Players:,Count of Other/Non-Disclosed Players:,Percentage of Other/Non-Disclosed Players:
0,84%,484,81,14%,11,2%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [5]:
#Purchase ID, SN, Age, Gender, Item ID, Item Name, Price 
#got an error the first time because the last parenthesis was in the wrong place, have to capture the unique 

unique_item_count = by_gender_df['Purchase ID'].count()

average_price = by_gender_df['Price'].mean()

total_price = by_gender_df['Price'].sum()
total_price

# delete all duplicates
duplicates = purchase_data.drop_duplicates(subset='SN', keep="first", inplace=False, ignore_index=False) 
#group data again after dropping duplicates 
grouped_dup = duplicates.groupby(["Gender"])

total_per_person = (total_price / grouped_dup["SN"].count())
total_per_person



gender_purchase_df = pd.DataFrame ({
     'Purchase Count': unique_item_count,
     'Average Purchase Price': average_price,
     'Total Purchase Value': total_price,
     'Average total purchases by gender per person': total_per_person})


gender_purchase_df['Average total purchases by gender per person'] = gender_purchase_df['Average total purchases by gender per person'].map('${:.0f}'.format)
gender_purchase_df['Total Purchase Value'] = gender_purchase_df['Total Purchase Value'].map('${:.0f}'.format)
gender_purchase_df['Average Purchase Price'] = gender_purchase_df['Average Purchase Price'].map('{:.0f}%'.format)

gender_purchase_df


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average total purchases by gender per person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,3%,$362,$4
Male,652,3%,$1968,$4
Other / Non-Disclosed,15,3%,$50,$5


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [6]:
bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 100000]
group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

# #database for age Demo
purchase_data['Age Demographics']= pd.cut(purchase_data['Age'], bins, labels=group_names)
demographic_df =purchase_data.groupby('Age Demographics')

#count the number of distinct elements in the specified axis, in this case our age demograhic dataframe 
count_players_age = demographic_df['SN'].nunique()
count_players_age

#divide by the total player count regardless of sex to get the percentage of gender by age 
avg_gender_age = (count_players_age / player_count)*100

format_avg_gender_age = avg_gender_age.map('{:.02f}%'.format)

#create a summary table 
gender_age_df = pd.DataFrame ({
     'Percentage Count': format_avg_gender_age,
     'Count': count_players_age
})

#display age demographic table 
gender_age_df


Unnamed: 0_level_0,Percentage Count,Count
Age Demographics,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,2.95%,17
10-14,3.82%,22
15-19,18.58%,107
20-24,44.79%,258
25-29,13.37%,77
30-34,9.03%,52
35-39,5.38%,31
40+,2.08%,12


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [7]:
# bins = [0, 9.9, 14.9, 19.9, 24.9, 29.9, 34.9, 39.9, 100000]
# group_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

# # #database for age Demo
# purchase_data['Age Demographics']= pd.cut(purchase_data['Age'], bins, labels=group_names)
# demographic_df =purchase_data.groupby('Age Demographics')

#manipulate demographic df to count
#count the number of distinct elements in the specified axis, in this case our age demograhic dataframe 
#when to use count versus nunique, they both returned the same numbers/results 

purchase_age_count = demographic_df['Purchase ID'].nunique()
avg_age_purchase_price = demographic_df['Price'].mean()
format_avg_age_purchase_price  = avg_age_purchase_price .map('${:.02f}'.format)
avg_age_purchase_total = demographic_df['Price'].sum()
#format_avg_age_purchase_total = avg_age_purchase_total.map('{:.02f}%'.format)

#count_players_age = demographic_df['SN'].nunique() -- already have it from previous cell 
per_person_age_purchase = avg_age_purchase_total / count_players_age
format_per_person_age_purchase = per_person_age_purchase.map('${:.02f}'.format)

purchasing_age_df = pd.DataFrame({
    'Purchase Count ': purchase_age_count, 
    'Average Purchase Price': format_avg_age_purchase_price,
   # '': format_avg_age_purchase_total,
    'Average per person purchase by age: ': format_per_person_age_purchase
    
})

purchasing_age_df.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Average per person purchase by age:
Age Demographics,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,23,$3.35,$4.54
10-14,28,$2.96,$3.76
15-19,136,$3.04,$3.86
20-24,365,$3.05,$4.32
25-29,101,$2.90,$3.81


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [8]:
#groupby SN 

spender = purchase_data.groupby('SN')


#total purchase value column in descending order
total_purchase_value = spender['Purchase ID'].nunique()

#wont let me format on the total price it changes the format
average_price = spender['Price'].mean()
average_price_format = average_price.map('${:,.02f}'.format)
total_price = spender['Price'].sum()
total_price_format = total_price.map('${:,.02f}'.format)

spender_df = pd.DataFrame ({
    'Purchase Count' : total_purchase_value,
    'Average Purchase Price': average_price_format,
    'Total Purchase': total_price_format    
})

#tried to format a different way and it changed the total purchase numbers and the purchase count?
#spender_df['Total Purchase'] = spender_df['Total Purchase'].map('${:.0f}'.format)
spender_df_format = spender_df.sort_values(['Purchase Count'], ascending=False)
spender_df_format.head()

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Iral74,4,$3.41,$13.62
Idastidru52,4,$3.86,$15.45
Asur53,3,$2.48,$7.44
Inguron55,3,$3.70,$11.11


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [9]:
#created a dataframe to retrieve the desired data 
#item_df = purchase_data[['Item ID', 'Item Name', 'Price']]

item_id_a = purchase_data.groupby(['Item ID', 'Item Name'])




#purchase count, average item price, and total purchase value 
item_id_count = item_id_a['Item ID'].count()
item_name_count = item_id_a['Item Name'].unique()
item_id_sum = item_id_a['Price'].sum().map('${:,.02f}'.format)
item_individual_price = item_id_a['Price'].mean().map('${:,.02f}'.format)

item_df = pd.DataFrame({
    'Purchase Count': item_id_count,
    'Item Name': item_name_count,
    'Average Item Price': item_individual_price,
    'Total Purchase Value': item_id_sum
    
})
item_df.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Name,Average Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,Splinter,4,[Splinter],$1.28,$5.12
1,Crucifer,4,[Crucifer],$2.94,$11.77
2,Verdict,6,[Verdict],$2.48,$14.88
3,Phantomlight,6,[Phantomlight],$2.49,$14.94
4,Bloodlord's Fetish,5,[Bloodlord's Fetish],$1.70,$8.50


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [10]:
#table by total purchase value in descending order
item_df_format = item_df.sort_values(['Purchase Count'], ascending=False)
item_df_format.set_index('Purchase Count', inplace=True)
item_df_format.head()

Unnamed: 0_level_0,Item Name,Average Item Price,Total Purchase Value
Purchase Count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
13,[Final Critic],$4.61,$59.99
12,"[Oathbreaker, Last Hope of the Breaking Storm]",$4.23,$50.76
9,[Fiery Glass Crusader],$4.58,$41.22
9,[Persuasion],$3.22,$28.99
9,"[Extraction, Quickblade Of Trembling Hands]",$3.53,$31.77


In [None]:
'''
Analysis: 

The item "Final Critic" was the most purchased item and had the highest average item price of items purchased.

Lisosia93 was the Top Spender. 

15-19 and 20-24 age groups spent the purchased the most number of items. 

<10 age group had the highest cost per person average per item. 

'''