# Apps Analyst

## General Purpose

Analyze data to help developers understand what type of apps are likely to attract more users


In [3]:
from csv import reader

In [1]:
def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

# In the next two cells we gonna open, read and make some explorations in the datasets

In [7]:
dataAppleStore = open('data/AppleStore.csv', encoding='utf8');
dataAppleStore = list(reader(dataAppleStore))

dataGoogleStore = open('data/googleplaystore.csv', encoding='utf8');
dataGoogleStore = list(reader(dataGoogleStore))

In [9]:
explore_data(dataGoogleStore,0, 10)
print('--------------------------------------------------')
explore_data(dataAppleStore,0,10)

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', 'January 15, 2018', '2.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', 'August 1, 2018', '1.2.4', '4.0.3 and up']


['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', 'June 8, 2018', 'Varies with device', '4.2 and up']


['Pixel Draw - Number Art Coloring Book', 'ART_AND_DESIGN', '4.3', '967', '2.8M', '100,000+', 'Free', '0', 'Eve

## Make some data clean to GoogleStore dataset
    - Remove data incomplete
    - Remove duplicated data
    - Remove applications that are not designed for english-speaking audience
    - Remove non free apps

### Remove data incomplete


In [24]:
for i in dataGoogleStore[1:]:
    if len(i)!=len(dataGoogleStore[0]):
        del dataGoogleStore[dataGoogleStore.index(i)]

In [36]:
unique_apps = []
duplicated_apps = []
for app in dataGoogleStore[1:]:
    if app[0] in unique_apps:
        duplicated_apps.append([app[0]])
    else:
        unique_apps.append(app[0])
        
print(len(unique_apps))
print(len(duplicated_apps))
#print([i for i in duplicated_apps if i[0] == "Instagram"])
#print([i for i in dataGoogleStore[1:] if i[0]=="Instagram"])

9659
1181


In [39]:
reviews_max = {}
for app in dataGoogleStore[1:]:
    n_reviews = float(app[3])
    if app[0] in reviews_max:
        if n_reviews > float(reviews_max[app[0]]):
            reviews_max[app[0]] = n_reviews
    else:
        reviews_max[app[0]] = float(app[3])
print(len(reviews_max))

9659


### Remove duplicated data

In [48]:
android_clean = []
already_added = []
for app in dataGoogleStore[1:]:
    if float(app[3]) == reviews_max[app[0]] and app[0] not in already_added:
        android_clean.append(app)
        already_added.append(app[0])
print(len(android_clean))

9659


### Remove applications that  are not designed for english-speaking audience

In [56]:
def isEnglishName(name):
    timesNoEnglishCharacters = 0
    for i in name:
        if ord(i) > 127:
            if timesNoEnglishCharacters > 2:
                return False
            timesNoEnglishCharacters += 1
    return True

In [62]:
ios_apps_english = []
android_apps_english = []

for app in android_clean:
    if isEnglishName(app[0]):
        android_apps_english.append(app)
        
for app in dataAppleStore[1:]:
    if isEnglishName(app[2]):
        ios_apps_english.append(app)
        
print(len(android_apps_english))
print(len(ios_apps_english))


9614
6183


### Remove applications that  are not free

In [63]:
ios_apps_english_free = []
android_apps_english_free = []

for i in android_apps_english:
    if i[7] == "0":
        android_apps_english_free.append(i)

        
for i in ios_apps_english:
    if i[5] == "0":
        ios_apps_english_free.append(i)

print(len(ios_apps_english_free))
print(len(android_apps_english_free))


3222
8864


### Most Common Apps by Genre

In [70]:
def freq_table(dataset, index):
    freq_dicc = {}
    for i in dataset:
        if i[index] in freq_dicc:
            freq_dicc[i[index]] += 1
        else:
            freq_dicc[i[index]]  = 1
    
    for i in freq_dicc:
        freq_dicc[i] = (freq_dicc[i] / len(dataset)) * 100
    return freq_dicc



In [65]:
def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)

    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])

In [71]:
display_table(android_apps_english_free, -4)

Tools : 8.449909747292418
Entertainment : 6.069494584837545
Education : 5.347472924187725
Business : 4.591606498194946
Productivity : 3.892148014440433
Lifestyle : 3.892148014440433
Finance : 3.7003610108303246
Medical : 3.531137184115524
Sports : 3.463447653429603
Personalization : 3.3167870036101084
Communication : 3.2378158844765346
Action : 3.1024368231046933
Health & Fitness : 3.0798736462093865
Photography : 2.944494584837545
News & Magazines : 2.7978339350180503
Social : 2.6624548736462095
Travel & Local : 2.3240072202166067
Shopping : 2.2450361010830324
Books & Reference : 2.1435018050541514
Simulation : 2.0419675090252705
Dating : 1.861462093862816
Arcade : 1.8501805054151623
Video Players & Editors : 1.7712093862815883
Casual : 1.7599277978339352
Maps & Navigation : 1.3989169675090252
Food & Drink : 1.2409747292418771
Puzzle : 1.128158844765343
Racing : 0.9927797833935018
Role Playing : 0.9363718411552346
Libraries & Demo : 0.9363718411552346
Auto & Vehicles : 0.9250902527075

In [72]:
display_table(android_apps_english_free, 1)

FAMILY : 18.907942238267147
GAME : 9.724729241877256
TOOLS : 8.461191335740072
BUSINESS : 4.591606498194946
LIFESTYLE : 3.9034296028880866
PRODUCTIVITY : 3.892148014440433
FINANCE : 3.7003610108303246
MEDICAL : 3.531137184115524
SPORTS : 3.395758122743682
PERSONALIZATION : 3.3167870036101084
COMMUNICATION : 3.2378158844765346
HEALTH_AND_FITNESS : 3.0798736462093865
PHOTOGRAPHY : 2.944494584837545
NEWS_AND_MAGAZINES : 2.7978339350180503
SOCIAL : 2.6624548736462095
TRAVEL_AND_LOCAL : 2.33528880866426
SHOPPING : 2.2450361010830324
BOOKS_AND_REFERENCE : 2.1435018050541514
DATING : 1.861462093862816
VIDEO_PLAYERS : 1.7937725631768955
MAPS_AND_NAVIGATION : 1.3989169675090252
FOOD_AND_DRINK : 1.2409747292418771
EDUCATION : 1.1620036101083033
ENTERTAINMENT : 0.9589350180505415
LIBRARIES_AND_DEMO : 0.9363718411552346
AUTO_AND_VEHICLES : 0.9250902527075812
HOUSE_AND_HOME : 0.8235559566787004
WEATHER : 0.8009927797833934
EVENTS : 0.7107400722021661
PARENTING : 0.6543321299638989
ART_AND_DESIGN : 

In [73]:
display_table(ios_apps_english_free, 12)

Games : 58.16263190564867
Entertainment : 7.883302296710118
Photo & Video : 4.9658597144630665
Education : 3.662321539416512
Social Networking : 3.2898820608317814
Shopping : 2.60707635009311
Utilities : 2.5139664804469275
Sports : 2.1415270018621975
Music : 2.0484171322160147
Health & Fitness : 2.0173805090006205
Productivity : 1.7380509000620732
Lifestyle : 1.5828677839851024
News : 1.3345747982619491
Travel : 1.2414649286157666
Finance : 1.1173184357541899
Weather : 0.8690254500310366
Food & Drink : 0.8069522036002483
Reference : 0.5586592178770949
Business : 0.5276225946617008
Book : 0.4345127250155183
Navigation : 0.186219739292365
Medical : 0.186219739292365
Catalogs : 0.12414649286157665


In [77]:
freq_table_ios = freq_table(ios_apps_english_free, 12)

for genre in freq_table_ios:
    total = 0
    len_genre = 0
    for app in ios_apps_english_free:
        if genre == app[12]:
            rating = float(app[6])
            total += rating
            len_genre += 1 
    average_genre = total / len_genre
    print("{} : {}".format(genre, average_genre))

Productivity : 21028.410714285714
Weather : 52279.892857142855
Shopping : 26919.690476190477
Reference : 74942.11111111111
Finance : 31467.944444444445
Music : 57326.530303030304
Utilities : 18684.456790123455
Travel : 28243.8
Social Networking : 71548.34905660378
Sports : 23008.898550724636
Health & Fitness : 23298.015384615384
Games : 22788.6696905016
Food & Drink : 33333.92307692308
News : 21248.023255813954
Book : 39758.5
Photo & Video : 28441.54375
Entertainment : 14029.830708661417
Business : 7491.117647058823
Lifestyle : 16485.764705882353
Education : 7003.983050847458
Navigation : 86090.33333333333
Medical : 612.0
Catalogs : 4004.0


In [78]:
freq_table_android_category = freq_table(android_apps_english_free, 1)

for genre in freq_table_android_category:
    total = 0
    len_genre = 0
    for app in android_apps_english_free:
        if genre == app[1]:
            installs = app[5]
            installs = installs.replace('+','')
            installs = installs.replace(',','')
            rating = float(installs)
            total += rating
            len_genre += 1 
    average_genre = total / len_genre
    print("{} : {}".format(genre, average_genre))

ART_AND_DESIGN : 1986335.0877192982
AUTO_AND_VEHICLES : 647317.8170731707
BEAUTY : 513151.88679245283
BOOKS_AND_REFERENCE : 8767811.894736841
BUSINESS : 1712290.1474201474
COMICS : 817657.2727272727
COMMUNICATION : 38456119.167247385
DATING : 854028.8303030303
EDUCATION : 1833495.145631068
ENTERTAINMENT : 11640705.88235294
EVENTS : 253542.22222222222
FINANCE : 1387692.475609756
FOOD_AND_DRINK : 1924897.7363636363
HEALTH_AND_FITNESS : 4188821.9853479853
HOUSE_AND_HOME : 1331540.5616438356
LIBRARIES_AND_DEMO : 638503.734939759
LIFESTYLE : 1437816.2687861272
GAME : 15588015.603248259
FAMILY : 3695641.8198090694
MEDICAL : 120550.61980830671
SOCIAL : 23253652.127118643
SHOPPING : 7036877.311557789
PHOTOGRAPHY : 17840110.40229885
SPORTS : 3638640.1428571427
TRAVEL_AND_LOCAL : 13984077.710144928
TOOLS : 10801391.298666667
PERSONALIZATION : 5201482.6122448975
PRODUCTIVITY : 16787331.344927534
PARENTING : 542603.6206896552
WEATHER : 5074486.197183099
VIDEO_PLAYERS : 24727872.452830188
NEWS_AND_