# Profitable App Profiles for the App Store and Google Play Markets

# Opening datasets

Let's open two datasets and explore the data inside each dataset

In [1]:
def open_data(file_csv):
    from csv import reader
    open_file = open(file_csv)
    read_file = reader(open_file)
    dataset = list(read_file)
    return dataset

In [2]:
#Opening AppleStore dataset
file_name = open_data('AppleStore.csv')
header_app_store = file_name[0]
app_store = file_name[1:]

In [3]:
#Opening GooglePlay dataset
file_name = open_data('googleplaystore.csv')
header_google_play = file_name[0]
google_play = file_name[1:]

# Exploring data

In [4]:
def explore_data(dataset,start,end,rows_and_columns=False):
    data_slice = dataset[start:end]
    for space in data_slice:
        print(space)
        print('\n')
    if rows_and_columns ==True:
        print("Number of columns:",len(dataset[0]))
        print("Number of rows:",len(dataset))

In [5]:
print(header_app_store)
print('\n')
explore_data(app_store,0,1,True)

['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']


['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


Number of columns: 16
Number of rows: 7197


In [6]:
print(header_google_play)
print('\n')
explore_data(google_play,0,1,True)

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


Number of columns: 13
Number of rows: 10841


# Quality Data Report:

In [7]:
import pandas as pd
APS = pd.read_csv('AppleStore.csv')
GPS = pd.read_csv('googleplaystore.csv')

In [8]:
def check_data_quality(df,column):
    quality_report = {
        'null_values': df.isnull().sum().to_dict(),
        'duplicate_values': df[column].duplicated().sum(),
        'unique_values': len(df[column].unique()),
        'total_rows': len(df)
        }
    return quality_report

In [9]:
quality_report = check_data_quality(df=APS,column='id')
display(quality_report)

{'null_values': {'id': 0,
  'track_name': 0,
  'size_bytes': 0,
  'currency': 0,
  'price': 0,
  'rating_count_tot': 0,
  'rating_count_ver': 0,
  'user_rating': 0,
  'user_rating_ver': 0,
  'ver': 0,
  'cont_rating': 0,
  'prime_genre': 0,
  'sup_devices.num': 0,
  'ipadSc_urls.num': 0,
  'lang.num': 0,
  'vpp_lic': 0},
 'duplicate_values': 0,
 'unique_values': 7197,
 'total_rows': 7197}

There is no error data in Appstore report, so it doesn't need to clean 

In [10]:
quality_report = check_data_quality(df=GPS, column='App')
display(quality_report)

{'null_values': {'App': 0,
  'Category': 0,
  'Rating': 1474,
  'Reviews': 0,
  'Size': 0,
  'Installs': 0,
  'Type': 1,
  'Price': 0,
  'Content Rating': 1,
  'Genres': 0,
  'Last Updated': 0,
  'Current Ver': 8,
  'Android Ver': 3},
 'duplicate_values': 1181,
 'unique_values': 9660,
 'total_rows': 10841}

# Data Cleaning

#      Detect inaccurate data in each row and remove

In [11]:
for row in app_store:
    header_length = len(header_app_store)
    row_length = len(row)
    if row_length != header_length:
        print(row)
        print(app_store.index(row))

**As we can see, row(10472) has problemtic datas in "category" and "rating" column which are 1.9 and 19. So i am going to delete it.

In [12]:
for row in google_play:
    header_length = len(header_google_play)
    row_length = len(row)
    if row_length != header_length:
        print(header_google_play)
        print(row)
        print(google_play.index(row))
del google_play[10472] #run del statement once

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']
['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']
10472


In [13]:
for row in app_store:
    header_length = len(header_app_store)
    row_length = len(row)
    if row_length != header_length:
        print(header_app_store)
        print(row)
        print(app_store.index(row))

#    Detect duplicate values

In [14]:
def check_duplicate(dataset,index):
    duplicate_app = []
    unique_app = []
    for app in dataset:
        name = app[index]
        if name  in unique_app:
            duplicate_app.append(name)
        elif name not in duplicate_app:
            unique_app.append(name)
    return duplicate_app, unique_app

In [15]:
duplicate_app, unique_app = check_duplicate(app_store,0)
print("Number of duplicate app:",len(duplicate_app))
print("Number of unique app:",len(unique_app))
print("Actual length of app store:",len(app_store))

Number of duplicate app: 0
Number of unique app: 7197
Actual length of app store: 7197


In [16]:
duplicate_app, unique_app = check_duplicate(google_play,0)
print("Number of duplicate app:",len(duplicate_app))
print("Number of unique app:",len(unique_app))
print("Actual length of google play:",len(google_play))

Number of duplicate app: 1181
Number of unique app: 9659
Actual length of google play: 10840


**Criterion to remove: keep the highest number of reviews in each app and remove the others

In [17]:
reviews_max = {}
for row in google_play:
    name = row[0]
    currnt_reviews = float(row[3])
    if (name in reviews_max) and reviews_max[name] < currnt_reviews:
        reviews_max[name] = currnt_reviews
    elif name not in reviews_max:
        reviews_max[name] = currnt_reviews
#display(reviews_max)

In [18]:
Cl_google_play = []
added_app = []
for row in google_play:
    name = row[0]
    currnt_reviews = float(row[3])
    if (currnt_reviews == reviews_max[name]) and (name not in added_app):
        Cl_google_play.append(row)
        added_app.append(name)

In [19]:
explore_data(Cl_google_play,0,2,True)

['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


Number of columns: 13
Number of rows: 9659


#     Remove Non-ASCII apps: 

In [20]:
def check_ASCII(string):
    non_ASCII = 0
    for character in string:
        if ord(character) > 127:
            non_ASCII += 1
    if non_ASCII >3:
        return "Non English"
    return "English"
#print(check_ASCII('Instagram'))
#Print('爱奇艺PPS -《欢乐颂2》电视剧热播')

In [21]:
EngCl_googleplay = []
EngCl_appstore= []

for app in Cl_google_play:
    name = app[0]
    if check_ASCII(name)=="English":
        EngCl_googleplay.append(app)     

for app in app_store:
    name = app[1]
    if check_ASCII(name)=="English":
        EngCl_appstore.append(app) 

In [22]:
explore_data(EngCl_appstore,0,2,True)

['284882215', 'Facebook', '389879808', 'USD', '0.0', '2974676', '212', '3.5', '3.5', '95.0', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0.0', '2161558', '1289', '4.5', '4.0', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


Number of columns: 16
Number of rows: 6183


In [23]:
explore_data(EngCl_googleplay,0,2,True)

['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


Number of columns: 13
Number of rows: 9614


We can see that we're left with 9614 Googleplay and 6183 AppleStore.

 #     Isolating the Free Apps

As i mentioned in the introduction, the company only build apps that are free. So i am going to remove non-free apps based on "Price" column.
Rightnow, i am going to create a function to see how many apps have the price "zero" and remove others after that.

In [24]:
def price_list(dataset,index):
    price_dict = {}
    for row in dataset:
        price = row[index]
        if price in price_dict:
            price_dict[price] +=1
        elif price not in price_dict:
            price_dict[price] = 1
    return price_dict

def sort_value(item):
    return item[1]

In [25]:
pricelist_appstore = price_list(EngCl_appstore,4)
sorted_value = sorted(pricelist_appstore.items(),key=sort_value,reverse=True)
print(sorted_value)

[('0.0', 3222), ('2.99', 669), ('0.99', 641), ('1.99', 610), ('4.99', 375), ('3.99', 266), ('6.99', 165), ('9.99', 76), ('5.99', 43), ('7.99', 30), ('14.99', 15), ('19.99', 13), ('8.99', 8), ('24.99', 8), ('13.99', 6), ('29.99', 6), ('15.99', 4), ('17.99', 3), ('11.99', 3), ('59.99', 3), ('39.99', 2), ('16.99', 2), ('49.99', 2), ('20.99', 1), ('12.99', 1), ('74.99', 1), ('249.99', 1), ('27.99', 1), ('22.99', 1), ('18.99', 1), ('99.99', 1), ('21.99', 1), ('34.99', 1), ('299.99', 1)]


In [26]:
pricelist_googleplay = price_list(EngCl_googleplay,7)
sorted_value = sorted(pricelist_googleplay.items(),key=sort_value,reverse=True)
print(sorted_value)

[('0', 8864), ('$0.99', 145), ('$2.99', 124), ('$1.99', 73), ('$4.99', 70), ('$3.99', 56), ('$1.49', 45), ('$5.99', 26), ('$2.49', 25), ('$9.99', 19), ('$399.99', 11), ('$6.99', 10), ('$14.99', 9), ('$4.49', 9), ('$7.99', 7), ('$3.49', 7), ('$5.49', 5), ('$29.99', 5), ('$19.99', 5), ('$8.99', 5), ('$6.49', 5), ('$11.99', 3), ('$1.00', 3), ('$24.99', 3), ('$2.00', 3), ('$12.99', 3), ('$7.49', 2), ('$10.00', 2), ('$16.99', 2), ('$39.99', 2), ('$1.70', 2), ('$17.99', 2), ('$13.99', 2), ('$8.49', 2), ('$9.00', 1), ('$79.99', 1), ('$10.99', 1), ('$1.50', 1), ('$15.99', 1), ('$33.99', 1), ('$74.99', 1), ('$3.95', 1), ('$3.88', 1), ('$25.99', 1), ('$400.00', 1), ('$3.02', 1), ('$1.76', 1), ('$4.84', 1), ('$4.77', 1), ('$1.61', 1), ('$2.50', 1), ('$1.59', 1), ('$1.29', 1), ('$5.00', 1), ('$299.99', 1), ('$379.99', 1), ('$37.99', 1), ('$18.99', 1), ('$389.99', 1), ('$19.90', 1), ('$1.75', 1), ('$14.00', 1), ('$4.85', 1), ('$46.99', 1), ('$109.99', 1), ('$154.99', 1), ('$3.08', 1), ('$2.59', 1),

In [27]:
NFrEngCl_applestore = []
FrEngCl_applestore = []
for row in EngCl_appstore:
    price = row[4]
    if price != str('0.0') and price != str('0'):
        NFrEngCl_applestore.append(row)
    elif price == str('0.0') or price == str('0'):
        FrEngCl_applestore.append(row)
print("Free Apps of AppStore:",len(FrEngCl_applestore))
print("Non Free Apps of AppStore:",len(NFrEngCl_applestore))

Free Apps of AppStore: 3222
Non Free Apps of AppStore: 2961


In [28]:
NFrEngCl_googleplay = []
FrEngCl_googleplay = []
for row in EngCl_googleplay:
    price = row[7]
    if price != str('0.0') and price != str('0'):
        NFrEngCl_googleplay.append(row)
    elif price == str('0.0') or price == str('0'):
        FrEngCl_googleplay.append(row)
print("Free Apps of GooglePlay:",len(FrEngCl_googleplay))
print("Non Free Apps of GooglePlay:",len(NFrEngCl_googleplay))

Free Apps of GooglePlay: 8864
Non Free Apps of GooglePlay: 750


# Most Common Apps by Genre

**Build a frequency table for the prime_genre column of the App Store data set
for the Genres and Category columns of the Google Play data set.

In [29]:
def freq_table(dataset,index):
    genre_dict = {}
    total_number_of_apps = len(dataset)
    for row in  dataset:
        genre = row[index]
        if genre in genre_dict:
            genre_dict[genre] +=1
        elif genre not in genre_dict:
            genre_dict[genre] = 1
    
    genre_percentage = {}
    for genre in genre_dict:
        percentage = (genre_dict[genre]*100)/total_number_of_apps
        genre_percentage[genre] = percentage
    return genre_percentage

In [30]:
def display_table(dataset,index):
    table_display = freq_table(dataset,index)
    table_sorted = sorted(table_display.items(),key=lambda x:x[1],reverse=True)
    for entry in table_sorted:
        print(entry[0],":",entry[1])

In [31]:
display_table(FrEngCl_applestore,11)

Games : 58.16263190564867
Entertainment : 7.883302296710118
Photo & Video : 4.9658597144630665
Education : 3.6623215394165114
Social Networking : 3.2898820608317814
Shopping : 2.60707635009311
Utilities : 2.5139664804469275
Sports : 2.1415270018621975
Music : 2.0484171322160147
Health & Fitness : 2.017380509000621
Productivity : 1.7380509000620732
Lifestyle : 1.5828677839851024
News : 1.3345747982619491
Travel : 1.2414649286157666
Finance : 1.1173184357541899
Weather : 0.8690254500310366
Food & Drink : 0.8069522036002483
Reference : 0.5586592178770949
Business : 0.5276225946617008
Book : 0.4345127250155183
Navigation : 0.186219739292365
Medical : 0.186219739292365
Catalogs : 0.12414649286157665


In [32]:
display_table(FrEngCl_googleplay,1)

FAMILY : 18.907942238267147
GAME : 9.724729241877256
TOOLS : 8.461191335740072
BUSINESS : 4.591606498194946
LIFESTYLE : 3.9034296028880866
PRODUCTIVITY : 3.892148014440433
FINANCE : 3.700361010830325
MEDICAL : 3.5311371841155235
SPORTS : 3.395758122743682
PERSONALIZATION : 3.3167870036101084
COMMUNICATION : 3.237815884476534
HEALTH_AND_FITNESS : 3.079873646209386
PHOTOGRAPHY : 2.9444945848375452
NEWS_AND_MAGAZINES : 2.7978339350180503
SOCIAL : 2.6624548736462095
TRAVEL_AND_LOCAL : 2.33528880866426
SHOPPING : 2.2450361010830324
BOOKS_AND_REFERENCE : 2.1435018050541514
DATING : 1.861462093862816
VIDEO_PLAYERS : 1.7937725631768953
MAPS_AND_NAVIGATION : 1.3989169675090252
FOOD_AND_DRINK : 1.2409747292418774
EDUCATION : 1.1620036101083033
ENTERTAINMENT : 0.9589350180505415
LIBRARIES_AND_DEMO : 0.9363718411552346
AUTO_AND_VEHICLES : 0.9250902527075813
HOUSE_AND_HOME : 0.8235559566787004
WEATHER : 0.8009927797833934
EVENTS : 0.7107400722021661
PARENTING : 0.6543321299638989
ART_AND_DESIGN : 0

There are a good number of apps are designed for life-support apps (Family,Tools, Business,LifeStyle,Productivity,etc).

# Most Popular Apps by Genre on the AppStore

In [33]:
genre_appstore = freq_table(FrEngCl_applestore,11)
genre_averages = []

for unique_genre in genre_appstore:
    total_rating = 0
    length_genre = 0
    
    for app in FrEngCl_applestore:
        genre_app = app[11]
        if genre_app == unique_genre:
            n_ratings = float(app[5])
            total_rating += n_ratings
            length_genre += 1
            
    average_rating = total_rating / length_genre
    genre_averages.append((unique_genre, average_rating))
    
    print(unique_genre,":",average_rating)

Social Networking : 71548.34905660378
Photo & Video : 28441.54375
Games : 22788.6696905016
Music : 57326.530303030304
Reference : 74942.11111111111
Health & Fitness : 23298.015384615384
Weather : 52279.892857142855
Utilities : 18684.456790123455
Travel : 28243.8
Shopping : 26919.690476190477
News : 21248.023255813954
Navigation : 86090.33333333333
Lifestyle : 16485.764705882353
Entertainment : 14029.830708661417
Food & Drink : 33333.92307692308
Sports : 23008.898550724636
Book : 39758.5
Finance : 31467.944444444445
Education : 7003.983050847458
Productivity : 21028.410714285714
Business : 7491.117647058823
Catalogs : 4004.0
Medical : 612.0


In [34]:
top_5_genres = sorted(genre_averages,key=lambda x:x[1],reverse=True)[:5]
print("Top 5 Genres by Average Installs on AppleStore:")
for genre, average in top_5_genres:
    print(genre,":",average)

Top 5 Genres by Average Installs on AppleStore:
Navigation : 86090.33333333333
Reference : 74942.11111111111
Social Networking : 71548.34905660378
Music : 57326.530303030304
Weather : 52279.892857142855


In [35]:
for app in FrEngCl_applestore:
    if app[11] =='Navigation':
        print(app[1],":",app[5])

Waze - GPS Navigation, Maps & Real-time Traffic : 345046
Google Maps - Navigation & Transit : 154911
Geocaching® : 12811
CoPilot GPS – Car Navigation & Offline Maps : 3582
ImmobilienScout24: Real Estate Search in Germany : 187
Railway Route Search : 5


In [36]:
for app in FrEngCl_applestore:
    if app[11] == 'Social Networking':
        print(app[1], ':', app[5]) #App - Install

Facebook : 2974676
Pinterest : 1061624
Skype for iPhone : 373519
Messenger : 351466
Tumblr : 334293
WhatsApp Messenger : 287589
Kik : 260965
ooVoo – Free Video Call, Text and Voice : 177501
TextNow - Unlimited Text + Calls : 164963
Viber Messenger – Text & Call : 164249
Followers - Social Analytics For Instagram : 112778
MeetMe - Chat and Meet New People : 97072
We Heart It - Fashion, wallpapers, quotes, tattoos : 90414
InsTrack for Instagram - Analytics Plus More : 85535
Tango - Free Video Call, Voice and Chat : 75412
LinkedIn : 71856
Match™ - #1 Dating App. : 60659
Skype for iPad : 60163
POF - Best Dating App for Conversations : 52642
Timehop : 49510
Find My Family, Friends & iPhone - Life360 Locator : 43877
Whisper - Share, Express, Meet : 39819
Hangouts : 36404
LINE PLAY - Your Avatar World : 34677
WeChat : 34584
Badoo - Meet New People, Chat, Socialize. : 34428
Followers + for Instagram - Follower Analytics : 28633
GroupMe : 28260
Marco Polo Video Walkie Talkie : 27662
Miitomo : 2

In [37]:
for app in FrEngCl_applestore:
    if app[11] == 'Music':
        print(app[1], ':', app[5]) #App - Install

Pandora - Music & Radio : 1126879
Spotify Music : 878563
Shazam - Discover music, artists, videos & lyrics : 402925
iHeartRadio – Free Music & Radio Stations : 293228
SoundCloud - Music & Audio : 135744
Magic Piano by Smule : 131695
Smule Sing! : 119316
TuneIn Radio - MLB NBA Audiobooks Podcasts Music : 110420
Amazon Music : 106235
SoundHound Song Search & Music Player : 82602
Sonos Controller : 48905
Bandsintown Concerts : 30845
Karaoke - Sing Karaoke, Unlimited Songs! : 28606
My Mixtapez Music : 26286
Sing Karaoke Songs Unlimited with StarMaker : 26227
Ringtones for iPhone & Ringtone Maker : 25403
Musi - Unlimited Music For YouTube : 25193
AutoRap by Smule : 18202
Spinrilla - Mixtapes For Free : 15053
Napster - Top Music & Radio : 14268
edjing Mix:DJ turntable to remix and scratch music : 13580
Free Music - MP3 Streamer & Playlist Manager Pro : 13443
Free Piano app by Yokee : 13016
Google Play Music : 10118
Certified Mixtapes - Hip Hop Albums & Mixtapes : 9975
TIDAL : 7398
YouTube Mu

In [38]:
for app in FrEngCl_applestore:
    if app[11] == 'Reference':
        print(app[1], ':', app[5]) #App - Install

Bible : 985920
Dictionary.com Dictionary & Thesaurus : 200047
Dictionary.com Dictionary & Thesaurus for iPad : 54175
Google Translate : 26786
Muslim Pro: Ramadan 2017 Prayer Times, Azan, Quran : 18418
New Furniture Mods - Pocket Wiki & Game Tools for Minecraft PC Edition : 17588
Merriam-Webster Dictionary : 16849
Night Sky : 12122
City Maps for Minecraft PE - The Best Maps for Minecraft Pocket Edition (MCPE) : 8535
LUCKY BLOCK MOD ™ for Minecraft PC Edition - The Best Pocket Wiki & Mods Installer Tools : 4693
GUNS MODS for Minecraft PC Edition - Mods Tools : 1497
Guides for Pokémon GO - Pokemon GO News and Cheats : 826
WWDC : 762
Horror Maps for Minecraft PE - Download The Scariest Maps for Minecraft Pocket Edition (MCPE) Free : 718
VPN Express : 14
Real Bike Traffic Rider Virtual Reality Glasses : 8
教えて!goo : 0
Jishokun-Japanese English Dictionary & Translator : 0


# Most Popular Apps by Genre on Google Play

In [39]:
display_table(FrEngCl_googleplay,5)

1,000,000+ : 15.72653429602888
100,000+ : 11.552346570397113
10,000,000+ : 10.548285198555957
10,000+ : 10.1985559566787
1,000+ : 8.393501805054152
100+ : 6.915613718411552
5,000,000+ : 6.825361010830325
500,000+ : 5.561823104693141
50,000+ : 4.772111913357401
5,000+ : 4.512635379061372
10+ : 3.542418772563177
500+ : 3.2490974729241877
50,000,000+ : 2.3014440433212995
100,000,000+ : 2.1322202166064983
50+ : 1.917870036101083
5+ : 0.7897111913357401
1+ : 0.5076714801444043
500,000,000+ : 0.27075812274368233
1,000,000,000+ : 0.22563176895306858
0+ : 0.04512635379061372
0 : 0.01128158844765343


In [40]:
category_googleplay = freq_table(FrEngCl_googleplay,1)
category_averages = []

for unique_category in category_googleplay:
    total_installs = 0
    length_category = 0
    for app in FrEngCl_googleplay:
        category_app = app[1]
        if category_app == unique_category:
            n_installs = float(app[5].replace(',','+').replace('+',''))
            total_installs += n_installs
            length_category +=1
    average_install = total_installs / length_category
    category_averages.append((unique_category,average_install))
    
    print(unique_category,":",average_install)

ART_AND_DESIGN : 1986335.0877192982
AUTO_AND_VEHICLES : 647317.8170731707
BEAUTY : 513151.88679245283
BOOKS_AND_REFERENCE : 8767811.894736841
BUSINESS : 1712290.1474201474
COMICS : 817657.2727272727
COMMUNICATION : 38456119.167247385
DATING : 854028.8303030303
EDUCATION : 1833495.145631068
ENTERTAINMENT : 11640705.88235294
EVENTS : 253542.22222222222
FINANCE : 1387692.475609756
FOOD_AND_DRINK : 1924897.7363636363
HEALTH_AND_FITNESS : 4188821.9853479853
HOUSE_AND_HOME : 1331540.5616438356
LIBRARIES_AND_DEMO : 638503.734939759
LIFESTYLE : 1437816.2687861272
GAME : 15588015.603248259
FAMILY : 3695641.8198090694
MEDICAL : 120550.61980830671
SOCIAL : 23253652.127118643
SHOPPING : 7036877.311557789
PHOTOGRAPHY : 17840110.40229885
SPORTS : 3638640.1428571427
TRAVEL_AND_LOCAL : 13984077.710144928
TOOLS : 10801391.298666667
PERSONALIZATION : 5201482.6122448975
PRODUCTIVITY : 16787331.344927534
PARENTING : 542603.6206896552
WEATHER : 5074486.197183099
VIDEO_PLAYERS : 24727872.452830188
NEWS_AND_

In [41]:
top_5_category = sorted(category_averages,key=lambda x:x[1],reverse=True)[:5]
print("Top 5 categories by Average Installs:")
for category,average in top_5_category:
    print(category,":",average)

Top 5 categories by Average Installs:
COMMUNICATION : 38456119.167247385
VIDEO_PLAYERS : 24727872.452830188
SOCIAL : 23253652.127118643
PHOTOGRAPHY : 17840110.40229885
PRODUCTIVITY : 16787331.344927534


To recommend potential app profile for GooglePlay, i'm going to filter those apps with under 100 millions installs.

In [69]:
for app in FrEngCl_googleplay:
    n_installs = app[5].replace(',','+').replace('+','')
    if app[1] == 'SOCIAL' and float(n_installs) < 100_000_000:
        print(app[0], ':', app[5])

Social network all in one 2018 : 100,000+
TextNow - free text + calls : 10,000,000+
The Messenger App : 1,000,000+
Messenger Pro : 1,000,000+
Free Messages, Video, Chat,Text for Messenger Plus : 1,000,000+
Telegram X : 5,000,000+
The Video Messenger App : 100,000+
Jodel - The Hyperlocal App : 1,000,000+
Hide Something - Photo, Video : 5,000,000+
Love Sticker : 1,000,000+
Web Browser & Fast Explorer : 5,000,000+
LiveMe - Video chat, new friends, and make money : 10,000,000+
VidStatus app - Status Videos & Status Downloader : 5,000,000+
Love Images : 1,000,000+
Web Browser ( Fast & Secure Web Explorer) : 500,000+
SPARK - Live random video chat & meet new people : 5,000,000+
Golden telegram : 50,000+
Facebook Local : 1,000,000+
Meet – Talk to Strangers Using Random Video Chat : 5,000,000+
MobilePatrol Public Safety App : 1,000,000+
💘 WhatsLov: Smileys of love, stickers and GIF : 1,000,000+
HTC Social Plugin - Facebook : 10,000,000+
Quora : 10,000,000+
Kate Mobile for VK : 10,000,000+
Fami

# Conclusions