
# Heroes of PyMoli - Business Analysis

Analyzing the data for the client, the most recent fantasy game Heroes of Pymoli.

Like many others in its genre, the game is free-to-play, but players are encouraged to purchase optional items that enhance their playing experience. 


-----------------------------------------------------------------------------------------------
As a first task, the company would like you to generate a report that breaks down the game's purchasing data into meaningful insights.

final report should include each of the following:

1 - Player Count
2 - Purchasing Analysis (Total)
3 - Gender Demographics
4 - Purchasing Analysis (Gender)
5 - Age Demographics
6 - Top Spenders
7 - Most Popular Items


In [443]:
import pandas as pd
import numpy as np

path = "Resources/purchase_data.csv"
game_pd = pd.read_csv(path)

#game_pd.describe

game_df = game_pd.rename(columns={
    "SN": "Player"
    })
game_df

game_df.head()

Unnamed: 0,Purchase ID,Player,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44



# 1 - Player Count


In [444]:
player_count = len(game_df["Player"].unique())

Analysis_0 = pd.DataFrame({"Total of Players" : [player_count]
    
})

Analysis_0

Unnamed: 0,Total of Players
0,576



# 2 - Purchasing Analysis (Total)


In [445]:
#GETTING ALL THE VALUES TO BUILD FIRST REPORT 
#--->

unique_item = len(game_df["Item ID"].unique())
print(unique_item)

average_purchase = round(game_df["Price"].mean())
print(average_purchase)

total_purchase = (game_df["Price"].count())
print(total_purchase)

total_revenue = (game_df["Price"].sum())
print(total_revenue)


#FRAMING THE RESULTS IN DATA FRAME TO PRINT REPORT
#--->
money = "$"
Analysis_1 = pd.DataFrame({ "Total Revenue": [total_revenue],
                                  "Total Unique Itens": [unique_item],
                                  "Average Purchase": [average_purchase],
                                  "Total Purchases": [total_purchase]
                                        
                                })

#FORMATING THE SYMBOLS
#...
Analysis_1["Average Purchase"] = Analysis_1["Average Purchase"].map("${:.2f}".format)
Analysis_1["Total Purchases"] = Analysis_1["Total Purchases"].map("${:.2f}".format)
Analysis_1["Total Revenue"] = Analysis_1["Total Revenue"].map("${:.2f}".format)

Analysis_1

183
3
780
2379.77


Unnamed: 0,Average Purchase,Total Purchases,Total Revenue,Total Unique Itens
0,$3.00,$780.00,$2379.77,183



# 3 - Gender Demographics


In [446]:
#CALCULATING PERCENTAGES
#..
Gender = game_df["Gender"].value_counts() #list
print(Gender) 

total_gender = game_df["Gender"].count() #total
print(total_gender)  

male_percentage = round((652*100)/780)
female_percentage = round((113*100)/780)
non_disclosed = round((15*100)/780)


#APPENDING THE VALUES IN A LIST THAT NEEDS TO MATCH THE "LIST OF GENDER
Percentage_of_players = []
Percentage_of_players.append(male_percentage)
Percentage_of_players.append(female_percentage)
Percentage_of_players.append(non_disclosed)



#PRINTING THE ANALYSIS IN A DATA FRAME
Analysis_2 = pd.DataFrame({ "Total Count" : game_df["Gender"].value_counts(),
                  "Percentage of Players" : Percentage_of_players
                           
})

Analysis_2

Male                     652
Female                   113
Other / Non-Disclosed     15
Name: Gender, dtype: int64
780


Unnamed: 0,Percentage of Players,Total Count
Male,84,652
Female,14,113
Other / Non-Disclosed,2,15



# 4 - Purchasing Analysis (Gender)


In [482]:
#GROUPING BY GENDER

gender_df = game_df.groupby(['Gender'])


#CREATING A FUNCTION TO CALCULATE THE PERCENTAGE
#--->

def percentage (a):
    return (a/780)*100


Analysis_3 = pd.DataFrame({ "Purchase Count": gender_df["Price"].count(),
                             "Average Purchase Price": gender_df["Price"].mean(),
                             "Total Purchase Value": gender_df["Price"].sum(),
                           "Avg Purchase per Person" : gender_df["Price"].sum() / gender_df["Price"].count()
                            })

Analysis_3

#FORMATING THE SYMBOLS WITH "MAP FUNCTION
#--->
Analysis_3["Average Purchase Price"] = Analysis_3["Average Purchase Price"].map("${:.2f}".format)
Analysis_3["Total Purchase Value"] = Analysis_3["Total Purchase Value"].map("${:.2f}".format)

Analysis_3

Unnamed: 0_level_0,Average Purchase Price,Avg Purchase per Person,Purchase Count,Total Purchase Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,$3.20,3.203009,113,$361.94
Male,$3.02,3.017853,652,$1967.64
Other / Non-Disclosed,$3.35,3.346,15,$50.19



# 5 - Age Demographics


In [448]:
#FIND THE MAX AND MIN VALUES TO CREATING THE BINS AND LABELS
#--->
age_range = game_df["Age"].max(), game_df["Age"].min()
age_range

group_labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]
bins = (0, 10, 15, 20, 25, 30, 35, 40, 500)



#CREATED ANOTHER COLUMN IN THE "GAME_DF THAT HAS THE RANGE(BINS), 
#STORED IN A VARIABLE ALSO, GROUPING BY THE NEW COLUMN
#--->
game_df["age_count"] = pd.cut(game_df["Age"], bins, labels=group_labels)
Age_grouped = game_df.groupby("age_count")



#CREATED LIST TO STORED VALUES  
age_list = []
percentage_list = []

#LOOPING TO GRAP THE VALUES AND APPEND TO THE LIST
for a in age_count:
    age_list.append(a)
    percentage_list.append(round((a/780)*100))

print(age_list) 
print(percentage_list)


#PRINT THE ANALYSIS IN DATA FRAME
Analysis_4 = pd.DataFrame({ "Total Count": age_count,
                           "Percentage of Players" : percentage_list         

})
Analysis_4["Percentage of Players"] = Analysis_4["Percentage of Players"].map(" {:.0f}".format) 

Analysis_4

[325, 200, 77, 54, 52, 33, 32, 7]
[42, 26, 10, 7, 7, 4, 4, 1]


Unnamed: 0,Percentage of Players,Total Count
20-24,42,325
15-19,26,200
25-29,10,77
10-14,7,54
30-34,7,52
35-39,4,33
<10,4,32
40+,1,7


In [449]:
#CALCULATING USING THE AGE_GROUPED DF TO PERFORM THE STATISTICS
#--->
Age_purchase_count = []
Age_purchase_count = Age_grouped["Purchase ID"].count()

Age_purchase_avg = Age_grouped["Price"].mean()

Age_purchase_total = Age_grouped["Price"].sum()

Age_purchase_person = Age_purchase_total/age_count


#CREATING DATA FRAME WITH THE RESULTS
#--->
Analysis_6 = pd.DataFrame({ "Purchase Count" : Age_purchase_count,
                          "Purchase Average" : Age_purchase_avg,
                           "Total Purchase Value" : Age_purchase_total,
                           "Total Purchase per Person" : Age_purchase_person
                          })

#FORMATING TO NOT SHOW NUMBER AFTER THE ","
#--->
Analysis_6["Purchase Average"] = Analysis_6["Purchase Average"].map(" {:.1f}".format) 
Analysis_6["Total Purchase Value"] = Analysis_6["Total Purchase Value"].map(" {:.1f}".format) 
Analysis_6["Total Purchase per Person"] = Analysis_6["Total Purchase per Person"].map(" {:.1f}".format) 

Analysis_6 = Analysis_6.sort_values(["Purchase Count"], ascending= False)

Analysis_6

Unnamed: 0,Purchase Average,Purchase Count,Total Purchase Value,Total Purchase per Person
20-24,3.0,325,981.6,3.0
15-19,3.1,200,621.6,3.1
25-29,2.9,77,221.4,2.9
10-14,2.9,54,156.6,2.9
30-34,3.0,52,155.7,3.0
35-39,3.4,33,112.3,3.4
<10,3.4,32,109.0,3.4
40+,3.1,7,21.5,3.1



# 6 -TOP Spenders


In [450]:
#FINDING OUT THE HIGH AND LOWER PURCHASES
#--->
game_df["Price"].max(), game_df["Price"].min()


#CREATING BINS TO GET HIGH PURCHASES
#--->
Price_labels = ["1", "2","3","4","5"]
Price_bins = [ 0, 1, 2, 3, 4, 5 ]


#GROUPING 
#--->
Player_grouped = game_df.groupby("Player")

Player_purchase_count = Player_grouped["Purchase ID"].count()
Player_average_purchase = Player_grouped["Price"].mean()
Player_total_purchase = Player_grouped["Price"].sum()



#ANALYSIS DATA FRAME
#--->
Analysis_7 = pd.DataFrame({ "Number of Purchases" : Player_purchase_count,
                           "Average Purchase Price" : Player_average_purchase,
                           "Total Spend" : Player_total_purchase
                        })

#FORMATING THE ORDER OF VALUES IN THE TABLE AND ROUNDING NUMBERS
#--->
Analysis_7 = Analysis_7.sort_values(["Total Spend"], ascending=False).head()

Analysis_7["Average Purchase Price"] = Analysis_7["Average Purchase Price"].map(" {:.1f}".format)

Analysis_7



Unnamed: 0_level_0,Average Purchase Price,Number of Purchases,Total Spend
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,3.8,5,18.96
Idastidru52,3.9,4,15.45
Chamjask73,4.6,3,13.83
Iral74,3.4,4,13.62
Iskadarya95,4.4,3,13.1



# 7 - Most Popular Items



In [456]:
#GROUPED THE ITEMS THAT NEEDS TO BE DYPLAYED ON THE TABLE WITHOUT MODIFICATIONS

Item_grouped = game_df.groupby(["Item ID","Item Name", "Price"])


#COUNT THE TOTAL PURCHASES BY ITEM AND TOTAL
#--->
Item_purchase_count = Item_grouped["Price"].count()
Item_total_purchase = Item_grouped["Price"].sum()


#RUN ANALYSIS IN A DATA FRAME
Analysis_8 = pd.DataFrame({ "Total Purchase Value" : Item_total_purchase,
                          "Purchase Count" : Item_purchase_count,
                       
                          })

#SORTED THE ITEM BY POPULARITY 
Analysis_8 = Analysis_8.sort_values(["Purchase Count"], ascending= False).head()
Analysis_8


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Purchase Count,Total Purchase Value
Item ID,Item Name,Price,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",4.23,12,50.76
145,Fiery Glass Crusader,4.58,9,41.22
108,"Extraction, Quickblade Of Trembling Hands",3.53,9,31.77
82,Nirvana,4.9,9,44.1
19,"Pursuit, Cudgel of Necromancy",1.02,8,8.16



# 8 - Most Profitable Items


In [468]:
#GOT THE LAST TABLE AS REFERENCE AND SORTED IT BY TOTAL VALUE
Analysis_9 = Analysis_8

Analysis_9 = Analysis_9.sort_values(["Total Purchase Value"], ascending= False).head()


Analysis_9.to_csv("Output/game_Analysis.csv", index=False, header=True)

Analysis_9


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Purchase Count,Total Purchase Value
Item ID,Item Name,Price,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",4.23,12,50.76
82,Nirvana,4.9,9,44.1
145,Fiery Glass Crusader,4.58,9,41.22
108,"Extraction, Quickblade Of Trembling Hands",3.53,9,31.77
19,"Pursuit, Cudgel of Necromancy",1.02,8,8.16


In [1]:
import matplotlib as plt

plt.savefig("./"Output/game_Analysis.csv")


SyntaxError: invalid syntax (<ipython-input-1-9eb9dcca73b3>, line 3)