# Análise Lojas de Aplicativos

Análise de dados sobre aplicativos da Applestore (2017) e da Playstore (2018)

In [2]:
#Abrir arquivo com dados sobre a Applestore
from csv import reader

opened_file = open('AppleStore.csv')
read_file = reader(opened_file)
apps_ios = list(read_file)

In [53]:
#Abrir arquivo com dados sobre a Playstore
from csv import reader

opened_file = open('googleplaystore.csv')
read_file = reader(opened_file)
apps_android = list(read_file)

In [50]:
# função para explorar os dados 

def explore_data(dataset, inicio,fim, linhas_e_colunas = False):
    dataset_slice = dataset[inicio:fim]
    for row in dataset_slice:
        print(row)
        print('\n')
        
    if linhas_e_colunas:
        print('Número de linhas:', len(dataset))
        print('Número de colunas:', len(dataset[0]))

In [60]:
# Apps duplicados
# Verificar se há duplicidade nos dados

duplicateapps = []
uniqueapps = []

for row in apps_android:
    
    app = row[1]
    
    if app in unique_apps:
        duplicateapps.append([app])
    else:
        uniqueapps.append([app])
        
print(len(uniqueapps))
print(len(duplicateapps))
        

10841
0


In [64]:
reviews_max = {}

for row in apps_android[1:]:
    name = row[0]
    n_reviews = float(row[3])
    
    if (name in reviews_max) and (reviews_max[name] < n_reviews):
        reviews_max[name] = n_reviews
    elif not (name in reviews_max):
        reviews_max[name] = n_reviews
        
    

In [87]:
#função para determinar se há caracteres não pertecentes ao padrão no inglês

def is_ascii(text):
    test = 0
    for letter in text:
        if ord(letter) > 127:
            test += 1
    if test > 3:
        return False
    else:
        return True
            

In [88]:
is_ascii('Instagram')

True

In [89]:
is_ascii('爱奇艺PPS -《欢乐颂2》电视剧热播')

False

In [90]:
is_ascii('Docs To Go™ Free Office Suite')

True

In [91]:
is_ascii('Instachat 😜')

True

In [97]:
# Encontrar apps que os nomes estão fora do padrão ou em outro idioma

nomes_corretos = []
nomes_incorretos = []

for row in apps_android:
    name = row[0]
    if is_ascii(name):
        nomes_corretos.append(row)
    else:
        nomes_incorretos.append(row)
        
        


In [122]:
# Selecionando apenas o aplicativos grátis

def select_free_apps(dataset,index):
    apps_gra = []
    apps_pagos = []
    for row in dataset:
        try:
            if (row[index] == 'Free'):
                apps_pagos.append(row)
        except:
            if (float(row[index]) > 0):
                apps_pagos.append(row)
        else:
            apps_gra.append(row)
    return apps_gra,apps_pagos
        

In [133]:
# Encontrando os generos mais comuns nos datasets

def generos(dataset, coluna):
    genre = {}
    for row in dataset:
        if row[coluna] in genre:
            genre[row[coluna]] += 1
        else:
            genre[row[coluna]] = 1
    return genre

In [135]:
generos(apps_android[1:], 1)

{'ART_AND_DESIGN': 65,
 'AUTO_AND_VEHICLES': 85,
 'BEAUTY': 53,
 'BOOKS_AND_REFERENCE': 231,
 'BUSINESS': 460,
 'COMICS': 60,
 'COMMUNICATION': 387,
 'DATING': 234,
 'EDUCATION': 156,
 'ENTERTAINMENT': 149,
 'EVENTS': 64,
 'FAMILY': 1972,
 'FINANCE': 366,
 'FOOD_AND_DRINK': 127,
 'GAME': 1144,
 'HEALTH_AND_FITNESS': 341,
 'HOUSE_AND_HOME': 88,
 'LIBRARIES_AND_DEMO': 85,
 'LIFESTYLE': 382,
 'MAPS_AND_NAVIGATION': 137,
 'MEDICAL': 463,
 'NEWS_AND_MAGAZINES': 283,
 'PARENTING': 60,
 'PERSONALIZATION': 392,
 'PHOTOGRAPHY': 335,
 'PRODUCTIVITY': 424,
 'SHOPPING': 260,
 'SOCIAL': 295,
 'SPORTS': 384,
 'TOOLS': 843,
 'TRAVEL_AND_LOCAL': 258,
 'VIDEO_PLAYERS': 175,
 'WEATHER': 82}

In [137]:
generos(apps_ios[1:],11)

{'Book': 112,
 'Business': 57,
 'Catalogs': 10,
 'Education': 453,
 'Entertainment': 535,
 'Finance': 104,
 'Food & Drink': 63,
 'Games': 3862,
 'Health & Fitness': 180,
 'Lifestyle': 144,
 'Medical': 23,
 'Music': 138,
 'Navigation': 46,
 'News': 75,
 'Photo & Video': 349,
 'Productivity': 178,
 'Reference': 64,
 'Shopping': 122,
 'Social Networking': 167,
 'Sports': 114,
 'Travel': 81,
 'Utilities': 248,
 'Weather': 72}

In [150]:
#Função para criação de uma tabela de frequência percentual para qualquer coluna

def tab_freq(dataset,col):
    tabela_freq = {}
    total = 0
    for row in dataset:
        if row[col] in tabela_freq:
            tabela_freq[row[col]] += 1
        else:
            tabela_freq[row[col]] = 1
    for value in tabela_freq:
        total += tabela_freq[value]
    for value in tabela_freq:
        tabela_freq[value] = round(((tabela_freq[value] / total) * 100),2)
        
    return tabela_freq

In [151]:
tab_freq(apps_android[1:],8)

{'Adults only 18+': 0.03,
 'Everyone': 80.39,
 'Everyone 10+': 3.82,
 'Mature 17+': 4.6,
 'Teen': 11.14,
 'Unrated': 0.02}

In [155]:
#função para mostrar coluna

def display_table(dataset, index):
    table = tab_freq(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)

    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])

In [156]:
display_table(apps_android[1:],8)

Everyone : 80.39
Teen : 11.14
Mature 17+ : 4.6
Everyone 10+ : 3.82
Adults only 18+ : 0.03
Unrated : 0.02


In [159]:
display_table(apps_ios[1:],11)

Games : 53.66
Entertainment : 7.43
Education : 6.29
Photo & Video : 4.85
Utilities : 3.45
Health & Fitness : 2.5
Productivity : 2.47
Social Networking : 2.32
Lifestyle : 2.0
Music : 1.92
Shopping : 1.7
Sports : 1.58
Book : 1.56
Finance : 1.45
Travel : 1.13
News : 1.04
Weather : 1.0
Reference : 0.89
Food & Drink : 0.88
Business : 0.79
Navigation : 0.64
Medical : 0.32
Catalogs : 0.14


In [160]:
display_table(apps_android[1:], 1)

FAMILY : 18.19
GAME : 10.55
TOOLS : 7.78
MEDICAL : 4.27
BUSINESS : 4.24
PRODUCTIVITY : 3.91
PERSONALIZATION : 3.62
COMMUNICATION : 3.57
SPORTS : 3.54
LIFESTYLE : 3.52
FINANCE : 3.38
HEALTH_AND_FITNESS : 3.15
PHOTOGRAPHY : 3.09
SOCIAL : 2.72
NEWS_AND_MAGAZINES : 2.61
SHOPPING : 2.4
TRAVEL_AND_LOCAL : 2.38
DATING : 2.16
BOOKS_AND_REFERENCE : 2.13
VIDEO_PLAYERS : 1.61
EDUCATION : 1.44
ENTERTAINMENT : 1.37
MAPS_AND_NAVIGATION : 1.26
FOOD_AND_DRINK : 1.17
HOUSE_AND_HOME : 0.81
LIBRARIES_AND_DEMO : 0.78
AUTO_AND_VEHICLES : 0.78
WEATHER : 0.76
ART_AND_DESIGN : 0.6
EVENTS : 0.59
PARENTING : 0.55
COMICS : 0.55
BEAUTY : 0.49


In [161]:
display_table(apps_android[1:], 9)

Tools : 7.77
Entertainment : 5.75
Education : 5.06
Medical : 4.27
Business : 4.24
Productivity : 3.91
Sports : 3.67
Personalization : 3.62
Communication : 3.57
Lifestyle : 3.51
Finance : 3.38
Action : 3.37
Health & Fitness : 3.15
Photography : 3.09
Social : 2.72
News & Magazines : 2.61
Shopping : 2.4
Travel & Local : 2.37
Dating : 2.16
Books & Reference : 2.13
Arcade : 2.03
Simulation : 1.85
Casual : 1.78
Video Players & Editors : 1.6
Puzzle : 1.29
Maps & Navigation : 1.26
Food & Drink : 1.17
Role Playing : 1.01
Strategy : 0.99
Racing : 0.9
House & Home : 0.81
Libraries & Demo : 0.78
Auto & Vehicles : 0.78
Weather : 0.76
Adventure : 0.69
Events : 0.59
Comics : 0.54
Art & Design : 0.54
Beauty : 0.49
Education;Education : 0.46
Card : 0.44
Parenting : 0.42
Board : 0.41
Educational;Education : 0.38
Casino : 0.36
Trivia : 0.35
Educational : 0.34
Casual;Pretend Play : 0.29
Word : 0.27
Entertainment;Music & Video : 0.25
Education;Pretend Play : 0.21
Music : 0.2
Casual;Action & Adventure : 0.1