In [None]:
# Congratulations! After a lot of hard work in the data munging mines, you've landed a job as Lead Analyst for an independent gaming company. You've been assigned the task of analyzing the data for their most recent fantasy game Heroes of Pymoli.

# Like many others in its genre, the game is free-to-play, but players are encouraged to purchase optional items that enhance their playing experience. As a first task, the company would like you to generate a report that breaks down the game's purchasing data into meaningful insights.

# Your final report should include each of the following:

# ### Player Count

# * Total Number of Players

# ### Purchasing Analysis (Total)

# * Number of Unique Items - .unique
# * Average Purchase Price -.mean
# * Total Number of Purchases- .sum?
# * Total Revenue - .sum?

# ### Gender Demographics

# * Percentage and Count of Male Players
# * Percentage and Count of Female Players
# * Percentage and Count of Other / Non-Disclosed

# ### Purchasing Analysis (Gender)

# * The below each broken by gender
#   * Purchase Count
#   * Average Purchase Price
#   * Total Purchase Value
#   * Average Purchase Total per Person by Gender

# ### Age Demographics

# * The below each broken into bins of 4 years (i.e. &lt;10, 10-14, 15-19, etc.)
#   * Purchase Count
#   * Average Purchase Price
#   * Total Purchase Value
#   * Average Purchase Total per Person by Age Group

# ### Top Spenders

# * Identify the the top 5 spenders in the game by total purchase value, then list (in a table):
#   * SN
#   * Purchase Count
#   * Average Purchase Price
#   * Total Purchase Value

# ### Most Popular Items

# * Identify the 5 most popular items by purchase count, then list (in a table):
#   * Item ID
#   * Item Name
#   * Purchase Count
#   * Item Price
#   * Total Purchase Value

# ### Most Profitable Items

# * Identify the 5 most profitable items by total purchase value, then list (in a table):
#   * Item ID
#   * Item Name
#   * Purchase Count
#   * Item Price
#   * Total Purchase Value

# As final considerations:

# * You must use the Pandas Library and the Jupyter Notebook.
# * You must submit a link to your Jupyter Notebook with the viewable Data Frames.
# * You must include a written description of three observable trends based on the data.
# * See [Example Solution](HeroesOfPymoli/HeroesOfPymoli_starter.ipynb) for a reference on expected format.


In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load
data = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
data_df = pd.read_csv(data)

data_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [2]:
total_players = len(data_df["SN"].unique())
total_players = [{'Total Players':total_players}]
total_players_df = pd.DataFrame(total_players)
total_players_df


Unnamed: 0,Total Players
0,576


In [3]:
##PURCHASING ANALYSIS

unique_items = len(data_df["Item Name"].unique())

avg_price= (data_df["Price"].mean(axis=0))

total_purchase=len(data_df["Purchase ID"])

total_rev = data_df["Price"].sum(axis=0)

purchasing_analysis = [{'Number of Unique Items': unique_items,
                        'Average Price': avg_price,
                        'Number of Purchases': total_purchase,
                        'Total Revenue': total_rev}]
purchasing_analysis_df = pd.DataFrame(purchasing_analysis)
purchasing_analysis_df.round(2)

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,3.05,780,2379.77


In [4]:
data_df["Gender"].value_counts()

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64

In [5]:
##GENDER DEMOGRAPHICS
#Collect a list of all Male
male_data = data_df.loc[data_df["Gender"] == "Male"]
#Using GroupBy in order to separate data into fields according to "SN" users
male_sn = male_data.groupby(["SN","Gender"])
#find percentage of male
male_count = len(male_sn)
percent_male = (male_count/576)*100

#Collect a list of all female
female_data = data_df.loc[data_df["Gender"] == "Female"]
#Using GroupBy in order to separate data into fields according to "SN" users
female_sn = female_data.groupby(["SN","Gender"])
#find percentage of female
female_count = len(female_sn)
percent_female = (female_count/576)*100

#Collect a list of all female
other_data = data_df.loc[data_df["Gender"] == "Other / Non-Disclosed"]
#Using GroupBy in order to separate data into fields according to "SN" users
other_sn = other_data.groupby(["SN","Gender"])
#find percentage of "other"
other_count = len(other_sn)
percent_other = (other_count/576)*100

gender_demographics = ({'': ['Male', 'Female', 'Other / Non-Disclosed'],
                        'Total Count': [male_count, female_count, other_count],
                        'Percentage of Players': [percent_male, percent_female, percent_other]})
gender_demographics_df = pd.DataFrame(gender_demographics)
gender_demographics_df.round(2).set_index("")

Unnamed: 0,Total Count,Percentage of Players
,,
Male,484.0,84.03
Female,81.0,14.06
Other / Non-Disclosed,11.0,1.91


In [None]:
##TRIAL CODES


unique = data_df[["SN","Gender"]]
unique_df = pd.DataFrame(unique)
unique_df


name_index = unique_df.set_index("SN")
unique_index = name_index.iloc[:,0:576]
print(unique_index)

In [None]:
grouped_gender = data_df.groupby(["Gender"])

grouped_gender_df = pd.DataFrame(grouped_gender)
grouped_gender_df