In [179]:
# Import dependencies and Setup
import pandas as pd
import numpy as np

#Loading csv file
data_file = "purchase_data.csv"

#Read file and store into Pandas data frame 
Pymoli_Data = pd.read_csv(data_file)





In [180]:
#Gaining first peak at loaded data 
Pymoli_Data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [181]:
#declaring and defining Total Players variable
Total_Players = Pymoli_Data['SN'].nunique()

In [182]:
#creating Total Players data frame 
Total_Players_Df = pd.DataFrame ({"Total Players": [Total_Players]})
Total_Players_Df

Unnamed: 0,Total Players
0,576


In [183]:
#declaring and calculating variables needed for Purchase Analysis data frame 
#printing outputs 
num_unique_items = Pymoli_Data['Item Name'].nunique()  
print (num_unique_items)

average_price = Pymoli_Data['Price'].mean()
print (average_price)

num_of_purchases = Pymoli_Data['Purchase ID'].nunique()
print (num_of_purchases)

total_revenue = Pymoli_Data['Price'].sum()
print (total_revenue)

179
3.050987179487176
780
2379.77


In [184]:
#creating Purchase Analysis data frame by plugging in variables
purchasing_analysis = pd.DataFrame({'Number of Unique Items':[num_unique_items],
                                    'Average Price':[average_price],
                                    'Number of Purchases':[num_of_purchases],
                                    'Total Revenue':[total_revenue]})
#formatting data to appear cleaner 
purchasing_analysis["Average Price"] = purchasing_analysis["Average Price"].map("${:.2f}".format)
purchasing_analysis["Total Revenue"] = purchasing_analysis["Total Revenue"].map("${:.2f}".format)

#printing data frame 
purchasing_analysis

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$3.05,780,$2379.77


In [185]:
#dropping duplicates via screename to get accurate count of male/female
drop_duplicateSN_df = Pymoli_Data.drop_duplicates(['SN'], keep='first') 


In [186]:
#calculating counts of male/female
male_female_counts = drop_duplicateSN_df['Gender'].value_counts()
male_female_counts.head()

Male                     484
Female                    81
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [187]:
#calculating percentages of players based on gender
gender_percents = drop_duplicateSN_df['Gender'].value_counts()/Total_Players * 100
gender_percents.head()

Male                     84.027778
Female                   14.062500
Other / Non-Disclosed     1.909722
Name: Gender, dtype: float64

In [188]:
#creating summary Dataframe for Gender Demographics 
gender_summary = pd.DataFrame({"Total Count": male_female_counts,
                               "Percentage of Players": gender_percents})
gender_summary.head()

Unnamed: 0,Total Count,Percentage of Players
Male,484,84.027778
Female,81,14.0625
Other / Non-Disclosed,11,1.909722


In [189]:
#Creating purchasing analysis by gender. 
#calculated total purchase count per gender
purchase_count = Pymoli_Data['Gender'].value_counts()
purchase_count.head()

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [190]:
#grouping data by "gender"
gender_grouped = Pymoli_Data.groupby(['Gender'])
gender_grouped.count().head(10)


Unnamed: 0_level_0,Purchase ID,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,113,113,113,113,113,113
Male,652,652,652,652,652,652
Other / Non-Disclosed,15,15,15,15,15,15


In [191]:
#calculated average purchase price based on gender groupby
avg_purchase_price = gender_grouped["Price"].mean()
avg_purchase_price.head()

Gender
Female                   3.203009
Male                     3.017853
Other / Non-Disclosed    3.346000
Name: Price, dtype: float64

In [192]:
#calculated total purchase value based on gender groupby
total_purchase_value = gender_grouped["Price"].sum()
total_purchase_value.head()

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [193]:
#calculate avg total purchase per person 
avg_per_person = total_purchase_value/male_female_counts
avg_per_person.head()

Female                   4.468395
Male                     4.065372
Other / Non-Disclosed    4.562727
dtype: float64

In [194]:
#creating the DataFrame for purchase analysis based on gender
gender_purchase_summary = pd.DataFrame({"Purchase Count": purchase_count,
                                       "Average Purchase Price": avg_purchase_price,
                                       "Total Purchase Value": total_purchase_value,
                                       "Avg Total Purchase per Person": avg_per_person})
#reformatting for cleaner presentation
gender_purchase_summary["Average Purchase Price"] = gender_purchase_summary["Average Purchase Price"].map("${:.2f}".format)
gender_purchase_summary["Total Purchase Value"] = gender_purchase_summary["Total Purchase Value"].map("${:.2f}".format)
gender_purchase_summary["Avg Total Purchase per Person"] = gender_purchase_summary["Avg Total Purchase per Person"].map("${:.2f}".format)

gender_purchase_summary

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,$1967.64,$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [195]:
#Beginning Age Demographics analysis 
#printing max and min age values to create bins 
print(Pymoli_Data["Age"].max())
print(Pymoli_Data["Age"].min())

45
7


In [196]:
#creating bins to create age ranges
bins = [0, 9, 14, 19, 24, 29, 34, 39, 46]

#creating labels for these bins 
group_labels = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']

#slicing data and placing it into created bins
pd.cut(drop_duplicateSN_df["Age"], bins, labels=group_labels)

0      20-24
1        40+
2      20-24
3      20-24
4      20-24
5      20-24
6      35-39
7      20-24
8      20-24
9      35-39
10     20-24
11     20-24
12     20-24
13     20-24
14     35-39
15     20-24
16     20-24
17     20-24
18     20-24
19     30-34
20     20-24
21     20-24
22     35-39
23       40+
24     30-34
25     25-29
26     10-14
27       <10
28     20-24
29     20-24
       ...  
728      40+
729    20-24
730    30-34
731    20-24
733    20-24
736    20-24
737    20-24
738    35-39
739    35-39
741    15-19
742    20-24
743    15-19
745    35-39
746    20-24
748    20-24
750    20-24
751    10-14
752    15-19
753    35-39
756    20-24
757    15-19
761      40+
765    15-19
769    15-19
771    15-19
773    20-24
774    10-14
775    20-24
777    20-24
778      <10
Name: Age, Length: 576, dtype: category
Categories (8, object): [<10 < 10-14 < 15-19 < 20-24 < 25-29 < 30-34 < 35-39 < 40+]

In [197]:
drop_duplicateSN_df["Age Group"] = pd.cut(drop_duplicateSN_df["Age"], bins, labels=group_labels)
drop_duplicateSN_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [198]:
#calculating total count and percentage of players 
total_age_counts = drop_duplicateSN_df['Age Group'].value_counts()
print(total_age_counts)

percent_by_age = drop_duplicateSN_df['Age Group'].value_counts()/Total_Players * 100
print(percent_by_age)


20-24    258
15-19    107
25-29     77
30-34     52
35-39     31
10-14     22
<10       17
40+       12
Name: Age Group, dtype: int64
20-24    44.791667
15-19    18.576389
25-29    13.368056
30-34     9.027778
35-39     5.381944
10-14     3.819444
<10       2.951389
40+       2.083333
Name: Age Group, dtype: float64


In [199]:
#creating a groupby object based on "Age Group"

by_age_df = pd.DataFrame({"Total Count": total_age_counts, "Percentage of Players": percent_by_age})

by_age_df["Percentage of Players"] = by_age_df["Percentage of Players"].map("%{:.2f}".format)



In [200]:
by_age_sort = by_age_df.sort_index(axis=0, level=None, ascending=True,
                                   inplace=False, kind='quicksort',
                                   na_position='last', sort_remaining=True, by=None)
by_age_sort.head(10)

Unnamed: 0,Total Count,Percentage of Players
<10,17,%2.95
10-14,22,%3.82
15-19,107,%18.58
20-24,258,%44.79
25-29,77,%13.37
30-34,52,%9.03
35-39,31,%5.38
40+,12,%2.08


In [201]:
#grouping new table by "Age Group"
Pymoli_Data["Age Group"] = pd.cut(Pymoli_Data["Age"], bins, labels=group_labels)
Pymoli_Data.head()


Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40+
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24


In [202]:
total_purchase_counts = Pymoli_Data['Age Group'].value_counts()
print(total_purchase_counts)

20-24    365
15-19    136
25-29    101
30-34     73
35-39     41
10-14     28
<10       23
40+       13
Name: Age Group, dtype: int64


In [203]:
ages_grouped = Pymoli_Data.groupby(['Age Group'])
ages_grouped.count().head()


Unnamed: 0_level_0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
<10,23,23,23,23,23,23,23
10-14,28,28,28,28,28,28,28
15-19,136,136,136,136,136,136,136
20-24,365,365,365,365,365,365,365
25-29,101,101,101,101,101,101,101


In [204]:
avg_age_purchase_price = ages_grouped["Price"].mean()
avg_age_purchase_price.head()

Age Group
<10      3.353478
10-14    2.956429
15-19    3.035956
20-24    3.052219
25-29    2.900990
Name: Price, dtype: float64

In [205]:
total_age_purchase_value = ages_grouped["Price"].sum()
total_age_purchase_value.head()

Age Group
<10        77.13
10-14      82.78
15-19     412.89
20-24    1114.06
25-29     293.00
Name: Price, dtype: float64

In [206]:
avg_per_person_age = total_age_purchase_value/total_purchase_counts
avg_per_person_age.head()

10-14    2.956429
15-19    3.035956
20-24    3.052219
25-29    2.900990
30-34    2.931507
dtype: float64

In [207]:
age_purchase_summary = pd.DataFrame({"Purchase Count": total_purchase_counts,
                                       "Average Purchase Price": avg_age_purchase_price,
                                       "Total Purchase Value": total_age_purchase_value,
                                       "Avg Total Purchase per Person": avg_per_person_age})
#reformatting for cleaner presentation
age_purchase_summary["Average Purchase Price"] = age_purchase_summary["Average Purchase Price"].map("${:.2f}".format)
age_purchase_summary["Total Purchase Value"] = age_purchase_summary["Total Purchase Value"].map("${:.2f}".format)
age_purchase_summary["Avg Total Purchase per Person"] = age_purchase_summary["Avg Total Purchase per Person"].map("${:.2f}".format)

age_purchase_summary

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
10-14,28,$2.96,$82.78,$2.96
15-19,136,$3.04,$412.89,$3.04
20-24,365,$3.05,$1114.06,$3.05
25-29,101,$2.90,$293.00,$2.90
30-34,73,$2.93,$214.00,$2.93
35-39,41,$3.60,$147.67,$3.60
40+,13,$2.94,$38.24,$2.94
<10,23,$3.35,$77.13,$3.35
