In [1]:
# Dependencies
import pandas as pd

In [2]:
# Create df
df = pd.read_csv("Resources/purchase_data.csv")
df = df.rename(columns={
    "Purchase ID":"purchase_id",
    "SN":"sn",
    "Age":"age",
    "Gender":"gender",
    "Item ID":"item_id",
    "Item Name":"item_name",
    "Price":"price"
})

In [None]:
### Player Count
tot_players = df.sn.unique().shape[0]

player_df = pd.DataFrame({"Total Players":tot_players}, index=[0])
player_df

In [None]:
### Purchasing Analysis (Total)
unique_items = df.item_id.unique().shape[0]
avg_price = round(df.price.mean(),2)
tot_purchases = df.shape[0]
tot_revenue = df.price.sum()

purchase_analysis = pd.DataFrame({
    "Unique Items": unique_items,
    "Average Price": avg_price,
    "Number of Purchases": tot_purchases,
    "Total Revenue": tot_revenue,
}, index=[0])
purchase_analysis

In [None]:
### Gender Demographics
gender_group = df.groupby(['gender'])

gender_counts = (gender_group['sn'].unique()).str.len()
gender_total = gender_counts.sum(axis = 0, skipna = True)

gender_demographics = pd.DataFrame({
    "Total Count":gender_counts[:],
    "Percentage of Players":round(((gender_counts[:]/gender_total)*100),2),
})
del gender_demographics.index.name
gender_demographics

In [None]:
### Purchasing Analysis (Gender)
purchasing_analysis_df = pd.DataFrame({
    'Genders':[i][0],
    'Purchase Count':[df[df.gender == i].shape[0]][0], 
    'Avg. Purchase Price':[df[df.gender == i].price.mean()][0],
    'Tot. Purchase Value':[df[df.gender == i].price.sum()][0],
} for i in gender_counts.index)
purchasing_analysis_df.set_index(['Genders'])

In [3]:
### Age Demographics
age_bins = [[i, i+4] for i in range(5, df.age.max(), 5)]

for a in age_bins:
    lower, upper = a
    print("For ages: {0}-{1}".format(*a))
    sdf = df[(df.age >= lower) & (df.age < upper)]
    print("\tPurchase Count:", sdf.shape[0])
    print("\tAvg. Purchase Price:", sdf.price.mean())
    print("\tTot. Purchase Value:", sdf.price.sum())
    #Average Purchase Total per Person by Age Group

For ages: 5-9
	Purchase Count: 17
	Avg. Purchase Price: 3.4623529411764706
	Tot. Purchase Value: 58.86
For ages: 10-14
	Purchase Count: 26
	Avg. Purchase Price: 2.9180769230769226
	Tot. Purchase Value: 75.87
For ages: 15-19
	Purchase Count: 113
	Avg. Purchase Price: 3.034601769911505
	Tot. Purchase Value: 342.9100000000001
For ages: 20-24
	Purchase Count: 298
	Avg. Purchase Price: 3.0330201342281877
	Tot. Purchase Value: 903.8400000000001
For ages: 25-29
	Purchase Count: 88
	Avg. Purchase Price: 2.9292045454545446
	Tot. Purchase Value: 257.77
For ages: 30-34
	Purchase Count: 64
	Avg. Purchase Price: 2.9979687500000005
	Tot. Purchase Value: 191.87
For ages: 35-39
	Purchase Count: 35
	Avg. Purchase Price: 3.5528571428571425
	Tot. Purchase Value: 124.34999999999998
For ages: 40-44
	Purchase Count: 10
	Avg. Purchase Price: 3.118
	Tot. Purchase Value: 31.18


In [None]:
### Age Demographics
age_bins = [[i, i+4] for i in range(10, df.age.max(), 5)]

age_df = pd.DataFrame({
    print("{0}-{1}".format(*a))
} for a in age_bins)

In [None]:
### Top Spenders
sn_groups = df.groupby(['sn'])
sn_count = sn_groups['sn'].count()
sn_average = sn_groups['price'].mean()
sn_total = sn_groups["price"].sum()

sn_df = pd.DataFrame({
    "Purchase Count": sn_count,
    "Avg. Purchase Price": sn_average,
    "Tot. Purchase Value": sn_total,
})

sn_df.sort_values(by=['Tot. Purchase Value'], ascending=False).head()

In [None]:
### Most Popular Items
item_groups = df.groupby(['item_id'])
item_name = item_groups.item_name.first()
item_count = item_groups.item_id.count()
item_price = item_groups.price.first()
item_total = item_groups.price.sum()

item_df = pd.DataFrame({
    "Item Name": item_name,
    "Purchase Count": item_count,
    "Item Price": item_price,
    "Tot. Purchase Value": item_total,
})

item_df.sort_values(by=['Purchase Count'], ascending=False).head()

In [None]:
### Most Profitable Items
item_df.sort_values(by=['Tot. Purchase Value'], ascending=False).head()