In [1]:
import pandas as pd
import numpy as np

# Profitable App Profiles for the App Store and Google Play Markets

Our aim in this project is to find mobile app profiles that are profitable for the App Store and Google Play markets. We're working as data analysts for a company that builds Android and iOS mobile apps, and our job is to enable our team of developers to make data-driven decisions with respect to the kind of apps they build.

At our company, we only build apps that are free to download and install, and our main source of revenue consists of in-app ads. This means that our revenue for any given app is mostly influenced by the number of users that use our app. Our goal for this project is to analyze data to help our developers understand what kinds of apps are likely to attract more users.

In [2]:
appstore = pd.read_csv("AppleStore.csv")
googlestore = pd.read_csv("googleplaystore.csv")

In [3]:
appstore

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
0,284882215,Facebook,389879808,USD,0.0,2974676,212,3.5,3.5,95.0,4+,Social Networking,37,1,29,1
1,389801252,Instagram,113954816,USD,0.0,2161558,1289,4.5,4.0,10.23,12+,Photo & Video,37,0,29,1
2,529479190,Clash of Clans,116476928,USD,0.0,2130805,579,4.5,4.5,9.24.12,9+,Games,38,5,18,1
3,420009108,Temple Run,65921024,USD,0.0,1724546,3842,4.5,4.0,1.6.2,9+,Games,40,5,1,1
4,284035177,Pandora - Music & Radio,130242560,USD,0.0,1126879,3594,4.0,4.5,8.4.1,12+,Music,37,4,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7192,1170406182,Shark Boom - Challenge Friends with your Pet,245415936,USD,0.0,0,0,0.0,0.0,1.0.9,4+,Games,38,5,1,1
7193,1069830936,【謎解き】ヤミすぎ彼女からのメッセージ,16808960,USD,0.0,0,0,0.0,0.0,1.2,9+,Book,38,0,1,1
7194,1070052833,Go!Go!Cat!,91468800,USD,0.0,0,0,0.0,0.0,1.1.2,12+,Games,37,2,2,1
7195,1081295232,Suppin Detective: Expose their true visage!,83026944,USD,0.0,0,0,0.0,0.0,1.0.3,12+,Entertainment,40,0,1,1


In [4]:
googlestore

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10838,Parkinson Exercices FR,MEDICAL,,3,9.5M,"1,000+",Free,0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [5]:
print(appstore.shape)
print(googlestore.shape)
print(appstore.head())
print(googlestore.head())


(7197, 16)
(10841, 13)
          id               track_name  size_bytes currency  price  \
0  284882215                 Facebook   389879808      USD    0.0   
1  389801252                Instagram   113954816      USD    0.0   
2  529479190           Clash of Clans   116476928      USD    0.0   
3  420009108               Temple Run    65921024      USD    0.0   
4  284035177  Pandora - Music & Radio   130242560      USD    0.0   

   rating_count_tot  rating_count_ver  user_rating  user_rating_ver      ver  \
0           2974676               212          3.5              3.5     95.0   
1           2161558              1289          4.5              4.0    10.23   
2           2130805               579          4.5              4.5  9.24.12   
3           1724546              3842          4.5              4.0    1.6.2   
4           1126879              3594          4.0              4.5    8.4.1   

  cont_rating        prime_genre  sup_devices.num  ipadSc_urls.num  lang.num  \
0

The code above shows the first five rows of both datasets and their shape (i.e the number of rows and columns)


In [6]:
print(appstore.columns)
print(googlestore.columns)

Index(['id', 'track_name', 'size_bytes', 'currency', 'price',
       'rating_count_tot', 'rating_count_ver', 'user_rating',
       'user_rating_ver', 'ver', 'cont_rating', 'prime_genre',
       'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic'],
      dtype='object')
Index(['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type',
       'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver',
       'Android Ver'],
      dtype='object')


This code displays the column names for both datasets

In [7]:
googlestore.isnull().sum()

App                  0
Category             0
Rating            1474
Reviews              0
Size                 0
Installs             0
Type                 1
Price                0
Content Rating       1
Genres               0
Last Updated         0
Current Ver          8
Android Ver          3
dtype: int64

In [8]:
print(googlestore.iloc[10472])
print(googlestore.iloc[10473])
print(googlestore.iloc[10471])
googlestore = googlestore.drop(10472)
print(googlestore.shape)

App               Life Made WI-Fi Touchscreen Photo Frame
Category                                              1.9
Rating                                               19.0
Reviews                                              3.0M
Size                                               1,000+
Installs                                             Free
Type                                                    0
Price                                            Everyone
Content Rating                                        NaN
Genres                                  February 11, 2018
Last Updated                                       1.0.19
Current Ver                                    4.0 and up
Android Ver                                           NaN
Name: 10472, dtype: object
App               osmino Wi-Fi: free WiFi
Category                            TOOLS
Rating                                4.2
Reviews                            134203
Size                                 4.1M
Installs 

Row 10472 was dropped from googlestore dataset due to inaccurate data leaving the remaning row to be 10840

In [9]:
duplicates = googlestore[googlestore.duplicated()]
print(duplicates)
print(duplicates.shape[0])

                                                   App      Category  Rating  \
229                       Quick PDF Scanner + OCR FREE      BUSINESS     4.2   
236                                                Box      BUSINESS     4.2   
239                                 Google My Business      BUSINESS     4.4   
256                                ZOOM Cloud Meetings      BUSINESS     4.4   
261                          join.me - Simple Meetings      BUSINESS     4.0   
...                                                ...           ...     ...   
8643                    Wunderlist: To-Do List & Tasks  PRODUCTIVITY     4.6   
8654   TickTick: To Do List with Reminder, Day Planner  PRODUCTIVITY     4.6   
8658                           ColorNote Notepad Notes  PRODUCTIVITY     4.6   
10049        Airway Ex - Intubate. Anesthetize. Train.       MEDICAL     4.3   
10768                                             AAFP       MEDICAL     3.8   

       Reviews                Size     

The code shows that there are 483 duplicates data in the googlestore datasets. To enable accurate analysis the duplicated rows will be dropped

In [10]:
googlestore = googlestore.drop_duplicates()
googlestore

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10838,Parkinson Exercices FR,MEDICAL,,3,9.5M,"1,000+",Free,0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


By dropping the duplicates rows we are left with 10357 rows

In [11]:
duplicate = appstore[appstore.duplicated()]
print(duplicate)
print(duplicate.shape[0])

Empty DataFrame
Columns: [id, track_name, size_bytes, currency, price, rating_count_tot, rating_count_ver, user_rating, user_rating_ver, ver, cont_rating, prime_genre, sup_devices.num, ipadSc_urls.num, lang.num, vpp_lic]
Index: []
0


In [12]:
def is_english(string):
    non_ascii = 0
    
    for character in string:
        if ord(character) > 127:
            non_ascii += 1
    
    if non_ascii > 3:
        return False
    else:
        return True
    
english = googlestore["App"].apply(is_english)
english

0        True
1        True
2        True
3        True
4        True
         ... 
10836    True
10837    True
10838    True
10839    True
10840    True
Name: App, Length: 10357, dtype: bool

In [13]:
english.value_counts()

True     10312
False       45
Name: App, dtype: int64

In [14]:
googlestore = googlestore[english]

In [15]:
googlestore

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100+,Free,0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10838,Parkinson Exercices FR,MEDICAL,,3,9.5M,"1,000+",Free,0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,"1,000+",Free,0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [16]:
googlestore.shape[0]

10312

In [17]:
googlestore["App"].value_counts()

ROBLOX                                                9
8 Ball Pool                                           7
Helix Jump                                            6
Zombie Catchers                                       6
Bubble Shooter                                        6
                                                     ..
pixiv                                                 1
Super P Launcher for Android P 9.0 launcher, theme    1
[substratum] Vacuum: P                                1
P-Home for KLWP                                       1
iHoroscope - 2018 Daily Horoscope & Astrology         1
Name: App, Length: 9614, dtype: int64

In [18]:
googlestore["App"].shape[0]

10312

In [19]:
eng = appstore['track_name'].apply(is_english)
eng

0        True
1        True
2        True
3        True
4        True
        ...  
7192     True
7193    False
7194     True
7195     True
7196    False
Name: track_name, Length: 7197, dtype: bool

In [20]:
eng.value_counts()

True     6183
False    1014
Name: track_name, dtype: int64

In [21]:
appstore = appstore[eng]
appstore

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
0,284882215,Facebook,389879808,USD,0.00,2974676,212,3.5,3.5,95.0,4+,Social Networking,37,1,29,1
1,389801252,Instagram,113954816,USD,0.00,2161558,1289,4.5,4.0,10.23,12+,Photo & Video,37,0,29,1
2,529479190,Clash of Clans,116476928,USD,0.00,2130805,579,4.5,4.5,9.24.12,9+,Games,38,5,18,1
3,420009108,Temple Run,65921024,USD,0.00,1724546,3842,4.5,4.0,1.6.2,9+,Games,40,5,1,1
4,284035177,Pandora - Music & Radio,130242560,USD,0.00,1126879,3594,4.0,4.5,8.4.1,12+,Music,37,4,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7182,1070854722,Be-be-bears!,480781312,USD,2.99,0,0,0.0,0.0,1.0.2.5,4+,Games,35,5,13,1
7186,1169971902,Hey Duggee: We Love Animals,136347648,USD,2.99,0,0,0.0,0.0,1.2,4+,Games,40,5,1,1
7192,1170406182,Shark Boom - Challenge Friends with your Pet,245415936,USD,0.00,0,0,0.0,0.0,1.0.9,4+,Games,38,5,1,1
7194,1070052833,Go!Go!Cat!,91468800,USD,0.00,0,0,0.0,0.0,1.1.2,12+,Games,37,2,2,1


After removing non-english rows from both datasets, the googlestore dataset is remaining 10312 rows (though the apps lenght is showing 9614 after using value counts to investigate ) while appstoredataset is remaining 6183 rows

In [22]:
googlestore["Price"].value_counts()

0          9551
$0.99       146
$2.99       125
$1.99        73
$4.99        70
           ... 
$18.99        1
$389.99       1
$19.90        1
$1.75         1
$1.04         1
Name: Price, Length: 92, dtype: int64

In [23]:
price = googlestore["Price"].str.strip().str.replace("$", "")
price

  """Entry point for launching an IPython kernel.


0        0
1        0
2        0
3        0
4        0
        ..
10836    0
10837    0
10838    0
10839    0
10840    0
Name: Price, Length: 10312, dtype: object

In [24]:
googlestore["Price"] = price
googlestore["Price"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


0        0
1        0
2        0
3        0
4        0
        ..
10836    0
10837    0
10838    0
10839    0
10840    0
Name: Price, Length: 10312, dtype: object

In [25]:
googlestore["Price"].value_counts()

0         9551
0.99       146
2.99       125
1.99        73
4.99        70
          ... 
18.99        1
389.99       1
19.90        1
1.75         1
1.04         1
Name: Price, Length: 92, dtype: int64

In [26]:
googlestore["Price"].unique()

array(['0', '4.99', '3.99', '6.99', '1.49', '2.99', '7.99', '5.99',
       '3.49', '1.99', '9.99', '7.49', '0.99', '9.00', '5.49', '10.00',
       '24.99', '11.99', '79.99', '16.99', '14.99', '1.00', '29.99',
       '12.99', '2.49', '10.99', '1.50', '19.99', '15.99', '33.99',
       '74.99', '39.99', '3.95', '4.49', '1.70', '8.99', '2.00', '3.88',
       '25.99', '399.99', '17.99', '400.00', '3.02', '1.76', '4.84',
       '4.77', '1.61', '2.50', '1.59', '6.49', '1.29', '5.00', '13.99',
       '299.99', '379.99', '37.99', '18.99', '389.99', '19.90', '8.49',
       '1.75', '14.00', '4.85', '46.99', '109.99', '154.99', '3.08',
       '2.59', '4.80', '1.96', '19.40', '3.90', '4.59', '15.46', '3.04',
       '4.29', '2.60', '3.28', '4.60', '28.99', '2.95', '2.90', '1.97',
       '200.00', '89.99', '2.56', '30.99', '3.61', '394.99', '1.26',
       '1.20', '1.04'], dtype=object)

In [27]:
googlestore["Price"] = googlestore["Price"].astype(float)
googlestore["Price"]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
10836    0.0
10837    0.0
10838    0.0
10839    0.0
10840    0.0
Name: Price, Length: 10312, dtype: float64

In [28]:
googlestore = googlestore[googlestore["Price"] == 0.0]
googlestore

Unnamed: 0,App,Category,Rating,Reviews,Size,Installs,Type,Price,Content Rating,Genres,Last Updated,Current Ver,Android Ver
0,Photo Editor & Candy Camera & Grid & ScrapBook,ART_AND_DESIGN,4.1,159,19M,"10,000+",Free,0.0,Everyone,Art & Design,"January 7, 2018",1.0.0,4.0.3 and up
1,Coloring book moana,ART_AND_DESIGN,3.9,967,14M,"500,000+",Free,0.0,Everyone,Art & Design;Pretend Play,"January 15, 2018",2.0.0,4.0.3 and up
2,"U Launcher Lite – FREE Live Cool Themes, Hide ...",ART_AND_DESIGN,4.7,87510,8.7M,"5,000,000+",Free,0.0,Everyone,Art & Design,"August 1, 2018",1.2.4,4.0.3 and up
3,Sketch - Draw & Paint,ART_AND_DESIGN,4.5,215644,25M,"50,000,000+",Free,0.0,Teen,Art & Design,"June 8, 2018",Varies with device,4.2 and up
4,Pixel Draw - Number Art Coloring Book,ART_AND_DESIGN,4.3,967,2.8M,"100,000+",Free,0.0,Everyone,Art & Design;Creativity,"June 20, 2018",1.1,4.4 and up
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10836,Sya9a Maroc - FR,FAMILY,4.5,38,53M,"5,000+",Free,0.0,Everyone,Education,"July 25, 2017",1.48,4.1 and up
10837,Fr. Mike Schmitz Audio Teachings,FAMILY,5.0,4,3.6M,100+,Free,0.0,Everyone,Education,"July 6, 2018",1.0,4.1 and up
10838,Parkinson Exercices FR,MEDICAL,,3,9.5M,"1,000+",Free,0.0,Everyone,Medical,"January 20, 2017",1.0,2.2 and up
10839,The SCP Foundation DB fr nn5n,BOOKS_AND_REFERENCE,4.5,114,Varies with device,"1,000+",Free,0.0,Mature 17+,Books & Reference,"January 19, 2015",Varies with device,Varies with device


In [29]:
appstore["price"].value_counts()

0.00      3222
2.99       669
0.99       641
1.99       610
4.99       375
3.99       266
6.99       165
9.99        76
5.99        43
7.99        30
14.99       15
19.99       13
24.99        8
8.99         8
29.99        6
13.99        6
15.99        4
11.99        3
17.99        3
59.99        3
39.99        2
16.99        2
49.99        2
34.99        1
21.99        1
99.99        1
22.99        1
18.99        1
20.99        1
27.99        1
249.99       1
74.99        1
12.99        1
299.99       1
Name: price, dtype: int64

In [30]:
appstore = appstore[appstore["price"] == 0.0]
appstore

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
0,284882215,Facebook,389879808,USD,0.0,2974676,212,3.5,3.5,95.0,4+,Social Networking,37,1,29,1
1,389801252,Instagram,113954816,USD,0.0,2161558,1289,4.5,4.0,10.23,12+,Photo & Video,37,0,29,1
2,529479190,Clash of Clans,116476928,USD,0.0,2130805,579,4.5,4.5,9.24.12,9+,Games,38,5,18,1
3,420009108,Temple Run,65921024,USD,0.0,1724546,3842,4.5,4.0,1.6.2,9+,Games,40,5,1,1
4,284035177,Pandora - Music & Radio,130242560,USD,0.0,1126879,3594,4.0,4.5,8.4.1,12+,Music,37,4,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7164,1065550288,Tchapper Messenger - Keep it private,82187264,USD,0.0,0,0,0.0,0.0,2.5,4+,Lifestyle,37,0,9,1
7177,1168960919,Golden Lotus Slots,111412224,USD,0.0,0,0,0.0,0.0,1.1.0,17+,Games,37,5,3,1
7192,1170406182,Shark Boom - Challenge Friends with your Pet,245415936,USD,0.0,0,0,0.0,0.0,1.0.9,4+,Games,38,5,1,1
7194,1070052833,Go!Go!Cat!,91468800,USD,0.0,0,0,0.0,0.0,1.1.2,12+,Games,37,2,2,1


The final datasets for both googlestore and appstore are dataframes of free apps







As we mentioned in the introduction, our aim is to determine the kinds of apps that are likely to attract more users because our revenue is highly influenced by the number of people using our apps.

To minimize risks and overhead, our validation strategy for an app idea is comprised of three steps:

Build a minimal Android version of the app, and add it to Google Play.
If the app has a good response from users, we develop it further.
If the app is profitable after six months, we build an iOS version of the app and add it to the App Store.
Because our end goal is to add the app on both Google Play and the App Store, we need to find app profiles that are successful on both markets. For instance, a profile that works well for both markets might be a productivity app that makes use of gamification.

In [31]:
prime_genre = dict((appstore["prime_genre"].value_counts() / appstore.shape[0]) * 100)
prime_genre 

{'Games': 58.16263190564867,
 'Entertainment': 7.883302296710118,
 'Photo & Video': 4.9658597144630665,
 'Education': 3.662321539416512,
 'Social Networking': 3.2898820608317814,
 'Shopping': 2.60707635009311,
 'Utilities': 2.5139664804469275,
 'Sports': 2.1415270018621975,
 'Music': 2.0484171322160147,
 'Health & Fitness': 2.0173805090006205,
 'Productivity': 1.7380509000620732,
 'Lifestyle': 1.5828677839851024,
 'News': 1.3345747982619491,
 'Travel': 1.2414649286157666,
 'Finance': 1.1173184357541899,
 'Weather': 0.8690254500310366,
 'Food & Drink': 0.8069522036002483,
 'Reference': 0.5586592178770949,
 'Business': 0.5276225946617008,
 'Book': 0.4345127250155183,
 'Navigation': 0.186219739292365,
 'Medical': 0.186219739292365,
 'Catalogs': 0.12414649286157665}

The runner-up is the Game Apps

From the prime genre above it is observed that Games Apps has the highest percentage(58.16%) which is followed by Entertainment Apps(7.88%), followed by Photo and video Apps(4.96), and then Education Apps(3.66) and so on

The general impression here is that most of the apps are designed for entertainment (games, photo and video, social networking, sports, music

In [32]:
Genres = dict((googlestore["Genres"].value_counts() / googlestore.shape[0]) * 100)
Genres

{'Tools': 7.98869228353052,
 'Entertainment': 5.915610930792587,
 'Education': 5.14082295047639,
 'Business': 4.33462464663386,
 'Productivity': 3.957700764317872,
 'Lifestyle': 3.6540676368966603,
 'Finance': 3.5912469898439956,
 'Sports': 3.559836666317663,
 'Communication': 3.5388964506334415,
 'Action': 3.476075803580777,
 'Medical': 3.3923149408438906,
 'Personalization': 3.1933828918437857,
 'Photography': 3.151502460475343,
 'Health & Fitness': 3.0468013820542352,
 'Social': 2.9002198722646844,
 'News & Magazines': 2.7012878232645794,
 'Travel & Local': 2.3453041566328134,
 'Shopping': 2.313893833106481,
 'Arcade': 2.0730813527379333,
 'Books & Reference': 2.0730813527379333,
 'Dating': 1.978850382158936,
 'Simulation': 1.9579101664747147,
 'Casual': 1.9055596272641608,
 'Video Players & Editors': 1.7589781174746097,
 'Maps & Navigation': 1.3611140194744007,
 'Food & Drink': 1.2773531567375145,
 'Puzzle': 1.2250026175269606,
 'Racing': 0.9946602450005235,
 'Strategy': 0.95277981

In [33]:
Category = dict((googlestore["Category"].value_counts() / googlestore.shape[0]) * 100)
Category

{'FAMILY': 18.23892786095697,
 'GAME': 10.847031724426762,
 'TOOLS': 7.999162391372631,
 'BUSINESS': 4.33462464663386,
 'PRODUCTIVITY': 3.957700764317872,
 'LIFESTYLE': 3.664537744738771,
 'FINANCE': 3.5912469898439956,
 'COMMUNICATION': 3.5388964506334415,
 'SPORTS': 3.423725264370223,
 'MEDICAL': 3.3923149408438906,
 'PERSONALIZATION': 3.1933828918437857,
 'PHOTOGRAPHY': 3.151502460475343,
 'HEALTH_AND_FITNESS': 3.0468013820542352,
 'SOCIAL': 2.9002198722646844,
 'NEWS_AND_MAGAZINES': 2.7012878232645794,
 'TRAVEL_AND_LOCAL': 2.355774264474924,
 'SHOPPING': 2.313893833106481,
 'BOOKS_AND_REFERENCE': 2.0730813527379333,
 'DATING': 1.978850382158936,
 'VIDEO_PLAYERS': 1.7799183331588315,
 'MAPS_AND_NAVIGATION': 1.3611140194744007,
 'EDUCATION': 1.3087634802638466,
 'FOOD_AND_DRINK': 1.2773531567375145,
 'ENTERTAINMENT': 1.1412417547900744,
 'LIBRARIES_AND_DEMO': 0.8794890587373049,
 'AUTO_AND_VEHICLES': 0.8585488430530833,
 'HOUSE_AND_HOME': 0.8376086273688619,
 'WEATHER': 0.77478798031

The most common Apps in both Genres and Category in googlestore dataset are  'Tools': 7.98, 'Entertainment': 5.91,
 'Education': 5.14,
 'Business': 4.33,
 'Productivity': 3.95,
 'Lifestyle': 3.65,
 'Finance': 3.59,
 'FAMILY': 18.23,
 'GAME': 10.84,
 'TOOLS': 7.99,
 'BUSINESS': 4.33,
 'PRODUCTIVITY': 3.95,
 'LIFESTYLE': 3.66
 
 
 
 My observation from the above analysis is that the percentage gap between Game Apps in appstore is very wide from other Apps
 while the percentage between Apps in googlestore is not too wide 
 
 With this observation I can recommend Apps with high percentages, because they are the apps that have most users

In [34]:
app_avgrating = appstore["user_rating"].mean()
app_avgrating

3.867163252638113

In [35]:
google_avgrating = googlestore["Rating"].mean()
google_avgrating

4.1818325051508936

In [36]:
app_avgrating = dict(appstore.pivot_table(index="prime_genre", values="rating_count_tot"))
app_avgrating

{'rating_count_tot': prime_genre
 Book                 39758.500000
 Business              7491.117647
 Catalogs              4004.000000
 Education             7003.983051
 Entertainment        14029.830709
 Finance              31467.944444
 Food & Drink         33333.923077
 Games                22788.669691
 Health & Fitness     23298.015385
 Lifestyle            16485.764706
 Medical                612.000000
 Music                57326.530303
 Navigation           86090.333333
 News                 21248.023256
 Photo & Video        28441.543750
 Productivity         21028.410714
 Reference            74942.111111
 Shopping             26919.690476
 Social Networking    71548.349057
 Sports               23008.898551
 Travel               28243.800000
 Utilities            18684.456790
 Weather              52279.892857
 Name: rating_count_tot, dtype: float64}

In [37]:
navigate = appstore[appstore["prime_genre"] == "Navigation"]
navigate

Unnamed: 0,id,track_name,size_bytes,currency,price,rating_count_tot,rating_count_ver,user_rating,user_rating_ver,ver,cont_rating,prime_genre,sup_devices.num,ipadSc_urls.num,lang.num,vpp_lic
49,323229106,"Waze - GPS Navigation, Maps & Real-time Traffic",94139392,USD,0.0,345046,3040,4.5,4.5,4.24,4+,Navigation,37,5,36,1
130,585027354,Google Maps - Navigation & Transit,120232960,USD,0.0,154911,1253,4.5,4.0,4.31.1,12+,Navigation,37,5,34,1
881,329541503,Geocaching®,108166144,USD,0.0,12811,134,3.5,1.5,5.3,4+,Navigation,37,0,22,1
1633,504677517,CoPilot GPS – Car Navigation & Offline Maps,82534400,USD,0.0,3582,70,4.0,3.5,10.0.0.984,4+,Navigation,38,5,25,1
3987,344176018,ImmobilienScout24: Real Estate Search in Germany,126867456,USD,0.0,187,0,3.5,0.0,9.5,4+,Navigation,37,5,3,1
6033,463431091,Railway Route Search,46950400,USD,0.0,5,0,3.0,0.0,3.17.1,4+,Navigation,37,0,1,1


In [38]:
navi_avgrating = dict(navigate.pivot_table(index="track_name", values="rating_count_tot"))
navi_avgrating

{'rating_count_tot': track_name
 CoPilot GPS – Car Navigation & Offline Maps           3582
 Geocaching®                                          12811
 Google Maps - Navigation & Transit                  154911
 ImmobilienScout24: Real Estate Search in Germany       187
 Railway Route Search                                     5
 Waze - GPS Navigation, Maps & Real-time Traffic     345046
 Name: rating_count_tot, dtype: int64}

In [39]:
google_avgrating = dict((googlestore["Installs"].value_counts() / googlestore.shape[0]) * 100)
google_avgrating

{'1,000,000+': 15.328237880850173,
 '10,000,000+': 11.820751753743064,
 '100,000+': 10.857501832268872,
 '10,000+': 9.48591770495236,
 '1,000+': 7.800230342372526,
 '5,000,000+': 7.130143440477436,
 '100+': 6.418176107213905,
 '500,000+': 5.287404460265941,
 '50,000+': 4.4707360485813,
 '5,000+': 4.19851324468642,
 '100,000,000+': 3.8634697937388753,
 '10+': 3.287613862422783,
 '500+': 3.015391058527903,
 '50,000,000+': 2.8478693330541307,
 '50+': 1.7799183331588315,
 '5+': 0.7329075489477542,
 '500,000,000+': 0.6386765783687572,
 '1,000,000,000+': 0.5130352842634279,
 '1+': 0.47115485289498477,
 '0+': 0.0418804313684431,
 '0': 0.010470107842110775}

For the Googlestore, most values are open-ended (100+, 1,000+, 5,000+, etc.): the install numbers don't seem to give exact number to enable us conclude.

For instance, we don't know whether an app with 100,000+ installs has 100,000 installs, 200,000, or 350,000. However, we don't need very precise data for our purposes — we only want to find out which app genres attract the most users, and we don't need perfect precision with respect to the number of users.

We're going to leave the numbers as they are, which means that we'll consider that an app with 100,000+ installs has 100,000 installs, and an app with 1,000,000+ installs has 1,000,000 installs, and so on. To perform computations, however, we'll need to convert each install number from string to float. This means we need to remove the commas and the plus characters, otherwise the conversion will fail and raise an error.

In [40]:
inst = dict(googlestore["Installs"].value_counts())
inst

{'1,000,000+': 1464,
 '10,000,000+': 1129,
 '100,000+': 1037,
 '10,000+': 906,
 '1,000+': 745,
 '5,000,000+': 681,
 '100+': 613,
 '500,000+': 505,
 '50,000+': 427,
 '5,000+': 401,
 '100,000,000+': 369,
 '10+': 314,
 '500+': 288,
 '50,000,000+': 272,
 '50+': 170,
 '5+': 70,
 '500,000,000+': 61,
 '1,000,000,000+': 49,
 '1+': 45,
 '0+': 4,
 '0': 1}

In [41]:
googlestore["Installs"] = googlestore["Installs"].str.replace(",", "").str.replace("+", "").astype(float)
googlestore["Installs"].copy()

  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


0           10000.0
1          500000.0
2         5000000.0
3        50000000.0
4          100000.0
            ...    
10836        5000.0
10837         100.0
10838        1000.0
10839        1000.0
10840    10000000.0
Name: Installs, Length: 9551, dtype: float64

In [42]:
cate = dict(googlestore.pivot_table(index="Category", values="Installs"))
cate


{'Installs': Category
 ART_AND_DESIGN         2.038051e+06
 AUTO_AND_VEHICLES      6.473178e+05
 BEAUTY                 5.131519e+05
 BOOKS_AND_REFERENCE    9.678708e+06
 BUSINESS               2.085512e+06
 COMICS                 9.504432e+05
 COMMUNICATION          7.145239e+07
 DATING                 1.092671e+06
 EDUCATION              4.258800e+06
 ENTERTAINMENT          2.252716e+07
 EVENTS                 2.535422e+05
 FAMILY                 5.742822e+06
 FINANCE                2.245374e+06
 FOOD_AND_DRINK         2.113432e+06
 GAME                   3.042748e+07
 HEALTH_AND_FITNESS     4.675424e+06
 HOUSE_AND_HOME         1.565156e+06
 LIBRARIES_AND_DEMO     7.499501e+05
 LIFESTYLE              1.509956e+06
 MAPS_AND_NAVIGATION    5.569698e+06
 MEDICAL                1.282202e+05
 NEWS_AND_MAGAZINES     2.089999e+07
 PARENTING              5.426036e+05
 PERSONALIZATION        6.787659e+06
 PHOTOGRAPHY            3.228993e+07
 PRODUCTIVITY           3.296727e+07
 SHOPPING       