## Analysis on Permissions Dataset

We have used this dataset mainly for the columns In_app_adds, In_app_Purchases and also for the Game Category analysis.

### 1. Are there any factors that are impact by in_app_ads ?
Below code is to see whether the apps having in_app_adds impact the installations or not.

In [None]:
# generate bar plot
pd.pivot_table(Permission.reset_index(),
            index ='Rating',columns = 'Ad_Supported', values = 'Install'
              ).plot.bar(subplots=False, layout=(1,2))
# xalbel with size
plt.xlabel('Rating',size=20)
# y label with size
plt.ylabel('Installs',size=20)

### 2. Is there any impact on installs or ratings, if the free app has in-app purchases?
To see whether the in_app_purchases has any impact on the installs or on ratings

In [None]:
# genrate bar plot
Free_app = Permission[(Permission.Free == True)]
pd.pivot_table(Free_app.reset_index(),
            index ='Rating',columns = 'In_App_Purchases', values = 'Install'
              ).plot.bar(subplots=False, layout=(1,2))
# x label with size
plt.xlabel('Rating',size=20)
# y label with size
plt.ylabel('Installs',size=20)


### 3. Rating vs Content Rating
How do ratings differ between an app that has a restricted content rating and one that does not?

In [None]:
# group by content rating then count of apps and sum of rating count and mean of rating
CRR=Permission.groupby('Content Rating').agg(App_Count=('App','count'),R_C=('RatingCount','sum'),R=('Rating','mean'))
# adding column
CRR['Content_Rating']=['Everyone','Teen','Mature 17+','Everyone 10+','Adult 18+']
CRR

In [None]:
# Plot a simple line chart
plt.plot(CRR['Content_Rating'], CRR['R'], color='g', label='Line Avg Rating')

# Plot another line on the same chart/graph
#plt.plot(CRR['Content_Rating'], CRR['R_C'], color='r', label='Line Rating Count')

#plt.plot(X, CRR['R'], color='b', label='Line Avg Rating ')

plt.legend()
plt.subplot()
# display plot
plt.show()


fig, ax_left = plt.subplots()
ax_right = ax_left.twinx()

# plot and label , legends for left side
ax_left.plot(CRR['Content_Rating'], CRR['App_Count'], color='red', label='App Count')
ax_left.set_ylabel('App Count')
ax_left.legend(loc=2)
# plot and label , legends for right side
ax_right.plot(CRR['Content_Rating'], CRR['R_C'], color='green', label='Rating Count')
ax_right.set_ylabel('Rating Count')
ax_right.legend()

ax_right.plot(CRR['Content_Rating'], CRR['R'], color='blue')
#ax_right.set_ylabel('Avg Rating')
#ax_right.legend()

ax.grid(False)
plt.grid(False)
plt.show()

In [None]:
fig, host = plt.subplots(figsize=(8,5))
# plot on top of plot
par1 = host.twinx()
par2 = host.twinx()
# limits for x and y axes    
host.set_xlim(0, 5)
host.set_ylim(0, 914705)
par1.set_ylim(0, 1667438720)
par2.set_ylim(2, 2.82)
# labels for x and y 
host.set_xlabel("Content_Rating")
host.set_ylabel("App_Count")
par1.set_ylabel("R_C")
par2.set_ylabel("R")

color1 = plt.cm.viridis(0)
color2 = plt.cm.viridis(0.5)
color3 = plt.cm.viridis(.9)

p1, = host.plot(CRR['Content_Rating'], CRR['App_Count'],    color=color1, label="App Count")
p2, = par1.plot(CRR['Content_Rating'], CRR['R_C'],    color=color2, label="Rating Count")
p3, = par2.plot(CRR['Content_Rating'], CRR['R'], color=color3, label="Average Rating")
                
                
lns = [p1, p2, p3]
host.legend(handles=lns, loc='best')
# right, left, top, bottom
par2.spines['right'].set_position(('outward', 60))

# no x-ticks                 
par2.xaxis.set_ticks(CRR['Content_Rating'])
# remove grid
plt.grid(False)

### Game Category Analysis

In [None]:
# filtering the data to game category
Permission = Permission[Permission['Main Category'] == 'Game']

#### 1. Game apps distribution over the years.
Used to see the growth of game apps over the years.

In [None]:
font = {'family' : 'normal',
        'weight' : 2,
        'size'   : 15}
plt.rcParams['axes.facecolor'] = 'white'
plt.rc('font', **font)
plt.rc('axes', labelsize=17, titlesize=20)
Permission['App Age'] = Permission.LU_year - Permission.Released_year

In [None]:
# setting the size of the figure for the plot
plt.figure(figsize=(15,7))
# generate count plot 
sns.countplot(data=Permission[Permission.Released_year != 2021],
             x='Released_year',palette="terrain_r")
# rotating the labes on x - axis
plt.xticks(rotation=45)
# y label
plt.ylabel('Total of App')
# title of the plot
plt.title('App Distributrion By Year \n')
# x label
plt.xlabel('\nReleased Year')
# display the plot
plt.show()

#### 2. Category vs Installs
This one is used to know in which category there are more count of installed apps.

In [None]:
# set the size of the figure for the plot
plt.figure(figsize=(15,7))
# generate count plot
sns.countplot(data=Permission, x='Category', order=Permission.Category.value_counts().index , palette="terrain_r")
# rotating x axis labels
plt.xticks(rotation=45)
# plot a graph on top on another graph
plt.twinx()
# group by category and installations
dataplot = Permission.groupby('Category')['Install'].sum().loc[list(Permission.Category.value_counts().index)]
# generate the lineplot
sns.lineplot(data=dataplot.reset_index(),x='Category',y='Install')
# rotate x lables
plt.xticks(rotation=45)
# title of the plot
plt.title('Category Distibution \n')
plt.show()

#### 3. App Age vs Installs
Is App Age impact the installations?

In [None]:
# set the size of the figure for the plot
plt.figure(figsize=(10,5))
# group by app age and then take sum of installs then plot the values
Permission.groupby('App Age')['Install'].sum().plot(kind='bar',
                                                    xlabel = 'App Age',
                                                    ylabel = 'Total of install',color='green')
# title of the plot
plt.title('Is App Age impact to number of user?\n')
# display the plot
plt.show()

#### 4. Rating distribution of the Game Category Apps
To know at which range of the rating the Game apps are there in the Play Store.

In [None]:
# set the size of the figure for the plot
plt.figure(figsize=(15,7))
# Generate count plot
sns.countplot(data=Permission[Permission.Rating != 0],x='Rating',palette="terrain_r")
# rotate x axis labels
plt.xticks(rotation=60)
# y axis label
plt.ylabel('Total of App')
# title for the plot
plt.title('Rating Distribution \n')
# display plot
plt.show()

#### 5.Percent of Free\Ad Supported\In App Purchases\Editors Choice in Install quantity
To interpret which type of apps are getting more installations and whether those apps are add supported or support the In app Purchases or not.

In [None]:
# Set the size of the figure for the plot
plt.figure(figsize=(18,12))
# subplot creating a m*n grid and plot at position p
plt.subplot(221)
# generating a pie plot for free and paid with %
Permission['Type'].value_counts().plot(kind='pie',autopct='%1.1f%%',startangle=300, fontsize=15)
plt.subplot(222)
# generating a pie plot for Ad_support with %
Permission['Ad_Supported'].value_counts().plot(kind='pie',autopct='%1.1f%%',startangle=5, fontsize=15)
plt.subplot(223)
# generating a pie plot for In_App_Purchases with %
Permission['In_App_Purchases'].value_counts().plot(kind='pie',autopct='%1.1f%%',startangle=5, fontsize=15)
plt.subplot(224)
# generating a pie plot for Editors_Choice with %
Permission['Editors_Choice'].value_counts().plot(kind='pie',autopct='%1.1f%%',startangle=300, fontsize=15)
# display the plot
plt.show()

#### 6. Rating vs Installs
To know at which rating bracket the installations are more and the relation between Rating Count and installations.

In [None]:
# set the size of the figure for the plot
plt.figure(figsize=(15,7))
# font = {'family' : 'normal',
#         'weight' : 2,
#         'size'   : 10}
# setting plot
plt.rc('font', **font)        
# plt.rc('axes', labelsize=15, titlesize=17)
# creating subplot with m*n grid with plot at position p
plt.subplot(121)
# generate the scatter plot
sns.scatterplot(x="Rating", y="Install" , sizes=(0, 30), data=Permission);
# setting position for the scatter plot 
plt.subplot(122)
# generate the scatter plot
sns.scatterplot(data=Permission, x='RatingCount', y='Install')
# display plot
plt.show()

#### 7. Distribution of Size - Free Game App
To know the optimal size of the free game app

In [None]:
#the distribution of the size of Game
# filtering free
gamefree =Permission[Permission['Type']==0]
#set the size of the figurte for the plot
plt.figure(figsize=(20, 5))
sns.boxplot(data=gamefree,
            x='Size', color = 'green')
# title of the plot
plt.title('Distribution of Size - Free Game\n')
# display the plot
plt.show()

#### 8. Distribution of Size- Paid Game App
To know the optimal size of the paid game app

In [None]:
# setting the size of the figure for the plot
plt.figure(figsize=(20, 5))
# generate the boxplot
sns.boxplot(data=gamepaid,
            x='Size', color='green')
# title of the plot
plt.title('Distribution of Size - Paid Game\n')
# display the plot
plt.show()