# Mobile apps are everywhere. They are easy to create and can be very lucrative from the business standpoint. Specifically, Android is expanding as an operating system and has captured more than 74% of the total market.

# The Google Play Store apps data has enormous potential to facilitate data-driven decisions and insights for businesses. In this notebook, I analyzed the Android app market by comparing ~10k apps in Google Play across different categories. I also used the user reviews to draw a qualitative comparision between the apps.

In [52]:
import pandas as pd
import numpy as np
apps = pd.read_csv('datasets/apps.csv')
apps.info()
apps.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9659 entries, 0 to 9658
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   App           9659 non-null   object 
 1   Category      9659 non-null   object 
 2   Rating        8196 non-null   float64
 3   Reviews       9659 non-null   int64  
 4   Size          8432 non-null   float64
 5   Installs      9659 non-null   object 
 6   Type          9659 non-null   object 
 7   Price         9659 non-null   float64
 8   Last Updated  9659 non-null   object 
dtypes: float64(3), int64(1), object(5)
memory usage: 679.3+ KB


Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,"10,000+",Free,0.0,"January 7, 2018"
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,"500,000+",Free,0.0,"January 15, 2018"
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,"5,000,000+",Free,0.0,"August 1, 2018"
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,"50,000,000+",Free,0.0,"June 8, 2018"
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,"100,000+",Free,0.0,"June 20, 2018"


In [53]:
characters = [',', '+']
for char in characters:
    apps['Installs'] = apps['Installs'].apply(lambda x: x.replace(char, '') )
    
apps    


Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,"January 7, 2018"
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,"January 15, 2018"
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,"August 1, 2018"
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,"June 8, 2018"
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,"June 20, 2018"
...,...,...,...,...,...,...,...,...,...
9654,Sya9a Maroc - FR,FAMILY,4.5,38,53.0,5000,Free,0.0,"July 25, 2017"
9655,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6,100,Free,0.0,"July 6, 2018"
9656,Parkinson Exercices FR,MEDICAL,,3,9.5,1000,Free,0.0,"January 20, 2017"
9657,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,,1000,Free,0.0,"January 19, 2015"


In [54]:
apps['Installs'] = apps['Installs'].astype(int)
apps.head()

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19.0,10000,Free,0.0,"January 7, 2018"
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14.0,500000,Free,0.0,"January 15, 2018"
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7,5000000,Free,0.0,"August 1, 2018"
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25.0,50000000,Free,0.0,"June 8, 2018"
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8,100000,Free,0.0,"June 20, 2018"


In [55]:
apps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9659 entries, 0 to 9658
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   App           9659 non-null   object 
 1   Category      9659 non-null   object 
 2   Rating        8196 non-null   float64
 3   Reviews       9659 non-null   int64  
 4   Size          8432 non-null   float64
 5   Installs      9659 non-null   int32  
 6   Type          9659 non-null   object 
 7   Price         9659 non-null   float64
 8   Last Updated  9659 non-null   object 
dtypes: float64(3), int32(1), int64(1), object(4)
memory usage: 641.5+ KB


In [56]:
app_category_info = apps.groupby('Category').agg({'App':'count', 'Rating':'mean', 'Price':'mean'})
app_category_info

Unnamed: 0_level_0,App,Rating,Price
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ART_AND_DESIGN,64,4.357377,0.093281
AUTO_AND_VEHICLES,85,4.190411,0.158471
BEAUTY,53,4.278571,0.0
BOOKS_AND_REFERENCE,222,4.34497,0.539505
BUSINESS,420,4.098479,0.417357
COMICS,56,4.181481,0.0
COMMUNICATION,315,4.121484,0.263937
DATING,171,3.970149,0.160468
EDUCATION,119,4.364407,0.150924
ENTERTAINMENT,102,4.135294,0.078235


In [57]:
app_category_info = app_category_info.rename(columns={'App':'Number of apps', 'Price':'Average price', 'Rating':'Average rating'})
app_category_info

Unnamed: 0_level_0,Number of apps,Average rating,Average price
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ART_AND_DESIGN,64,4.357377,0.093281
AUTO_AND_VEHICLES,85,4.190411,0.158471
BEAUTY,53,4.278571,0.0
BOOKS_AND_REFERENCE,222,4.34497,0.539505
BUSINESS,420,4.098479,0.417357
COMICS,56,4.181481,0.0
COMMUNICATION,315,4.121484,0.263937
DATING,171,3.970149,0.160468
EDUCATION,119,4.364407,0.150924
ENTERTAINMENT,102,4.135294,0.078235


In [58]:
reviews = pd.read_csv('datasets/user_reviews.csv')
reviews.info()
reviews.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64295 entries, 0 to 64294
Data columns (total 4 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   App                 64295 non-null  object 
 1   Review              37427 non-null  object 
 2   Sentiment Category  37432 non-null  object 
 3   Sentiment Score     37432 non-null  float64
dtypes: float64(1), object(3)
memory usage: 2.0+ MB


Unnamed: 0,App,Review,Sentiment Category,Sentiment Score
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.0
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.25
2,10 Best Foods for You,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.4
4,10 Best Foods for You,Best idea us,Positive,1.0


In [59]:
finance_apps = apps[apps['Category']=='FINANCE']
finance_apps

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
837,K PLUS,FINANCE,4.4,124424,,10000000,Free,0.0,"June 26, 2018"
838,ING Banking,FINANCE,4.4,39041,,1000000,Free,0.0,"August 3, 2018"
839,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018"
840,The postal bank,FINANCE,3.7,36718,,5000000,Free,0.0,"July 16, 2018"
841,KTB Netbank,FINANCE,3.8,42644,19.0,5000000,Free,0.0,"June 28, 2018"
...,...,...,...,...,...,...,...,...,...
9540,BankNordik,FINANCE,3.9,28,15.0,5000,Free,0.0,"August 8, 2018"
9564,FP Markets,FINANCE,,1,2.0,100,Free,0.0,"January 30, 2018"
9565,FP Boss,FINANCE,,1,5.8,1,Free,0.0,"July 27, 2018"
9572,FP FCU,FINANCE,3.6,48,26.0,5000,Free,0.0,"April 5, 2018"


In [60]:
free_finance_apps = finance_apps[finance_apps['Type']=='Free']

In [61]:
free_finance_apps

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
837,K PLUS,FINANCE,4.4,124424,,10000000,Free,0.0,"June 26, 2018"
838,ING Banking,FINANCE,4.4,39041,,1000000,Free,0.0,"August 3, 2018"
839,Citibanamex Movil,FINANCE,3.6,52306,42.0,5000000,Free,0.0,"July 27, 2018"
840,The postal bank,FINANCE,3.7,36718,,5000000,Free,0.0,"July 16, 2018"
841,KTB Netbank,FINANCE,3.8,42644,19.0,5000000,Free,0.0,"June 28, 2018"
...,...,...,...,...,...,...,...,...,...
9540,BankNordik,FINANCE,3.9,28,15.0,5000,Free,0.0,"August 8, 2018"
9564,FP Markets,FINANCE,,1,2.0,100,Free,0.0,"January 30, 2018"
9565,FP Boss,FINANCE,,1,5.8,1,Free,0.0,"July 27, 2018"
9572,FP FCU,FINANCE,3.6,48,26.0,5000,Free,0.0,"April 5, 2018"


In [62]:
merged_df = pd.merge(reviews,free_finance_apps, on= 'App')
merged_df

Unnamed: 0,App,Review,Sentiment Category,Sentiment Score,Category,Rating,Reviews,Size,Installs,Type,Price,Last Updated
0,A+ Mobile,,,,FINANCE,3.9,730,6.3,10000,Free,0.0,"June 26, 2018"
1,A+ Mobile,"I rated higher, lowering rating. It simply wor...",Negative,-0.063889,FINANCE,3.9,730,6.3,10000,Free,0.0,"June 26, 2018"
2,A+ Mobile,,,,FINANCE,3.9,730,6.3,10000,Free,0.0,"June 26, 2018"
3,A+ Mobile,,,,FINANCE,3.9,730,6.3,10000,Free,0.0,"June 26, 2018"
4,A+ Mobile,It tells I need update option update. So I uni...,Positive,0.156250,FINANCE,3.9,730,6.3,10000,Free,0.0,"June 26, 2018"
...,...,...,...,...,...,...,...,...,...,...,...,...
2195,HDFC Bank MobileBanking,"I cant access When need this, stop Error shown...",Negative,-0.500000,FINANCE,4.2,208463,,10000000,Free,0.0,"July 26, 2018"
2196,HDFC Bank MobileBanking,I even able login. It recognising customer Id....,Positive,0.466667,FINANCE,4.2,208463,,10000000,Free,0.0,"July 26, 2018"
2197,HDFC Bank MobileBanking,"It working gives msg ""please enable Google pla...",Positive,0.033333,FINANCE,4.2,208463,,10000000,Free,0.0,"July 26, 2018"
2198,HDFC Bank MobileBanking,Need finger print security. Under account summ...,Negative,-0.090000,FINANCE,4.2,208463,,10000000,Free,0.0,"July 26, 2018"


In [63]:
app_sentiment_score = merged_df.groupby('App').agg({'Sentiment Score':'mean'})
app_sentiment_score

Unnamed: 0_level_0,Sentiment Score
App,Unnamed: 1_level_1
A+ Mobile,0.329592
ACE Elite,0.252171
Acorns - Invest Spare Change,0.046667
Amex Mobile,0.175666
Associated Credit Union Mobile,0.388093
BBVA Compass Banking,0.20559
BBVA Spain,0.515086
BZWBK24 mobile,0.326883
Bank of America Mobile Banking,0.180027
BankMobile Vibe App,0.353455


In [64]:
app_sentiment_score_srt = app_sentiment_score.sort_values(by = 'Sentiment Score', ascending = False)
app_sentiment_score_srt

Unnamed: 0_level_0,Sentiment Score
App,Unnamed: 1_level_1
BBVA Spain,0.515086
Associated Credit Union Mobile,0.388093
BankMobile Vibe App,0.353455
A+ Mobile,0.329592
Current debit card and app made for teens,0.327258
BZWBK24 mobile,0.326883
"Even - organize your money, get paid early",0.283929
Credit Karma,0.270052
Fortune City - A Finance App,0.266966
Branch,0.26423


In [65]:
top_10_user_feedback = app_sentiment_score_srt[:10]

In [66]:
top_10_user_feedback

Unnamed: 0_level_0,Sentiment Score
App,Unnamed: 1_level_1
BBVA Spain,0.515086
Associated Credit Union Mobile,0.388093
BankMobile Vibe App,0.353455
A+ Mobile,0.329592
Current debit card and app made for teens,0.327258
BZWBK24 mobile,0.326883
"Even - organize your money, get paid early",0.283929
Credit Karma,0.270052
Fortune City - A Finance App,0.266966
Branch,0.26423
