# Profitable App Profiles for the App Store and Google Play Markets!

The goal for this project is to analyze data to help our developers understand what type of apps are likely to attract more users

Our company only build apps that are free to download and install, and that are directed toward an English-speaking audience

In [1]:
from csv import reader

### The Google Play data set ###
opened_file = open('googleplaystore.csv')
read_file = reader(opened_file)
android = list(read_file)
android_header = android[0]
android = android[1:]

### The App Store data set ###
opened_file = open('AppleStore.csv')
read_file = reader(opened_file)
ios = list(read_file)
ios_header = ios[0]
ios = ios[1:]

In [2]:
def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

In [3]:
print("Android: ", android_header)
print("Ios: ", ios_header)

Android:  ['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']
Ios:  ['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']


# Cleaning the data

Removing duplicate apps, keeping those with the most reviews

First Android:

In [5]:
android_duplicate_apps = []
android_unique_apps = []

for app in android:
    name = app[0]
    if name in android_unique_apps:
        android_duplicate_apps.append(name)
    else:
        android_unique_apps.append(name)

In [6]:
android_reviews_max = {}

for app in android:
    name = app[0]
    n_reviews = app[3]
    
    #If the app is in the review dictionary and the review of the app in the dictionary is less than the review of the current app
    if name in android_reviews_max and android_reviews_max[name] < n_reviews:
        android_reviews_max[name] = n_reviews
    
    #If the app is not in the review dictionary
    elif name not in android_reviews_max:
        android_reviews_max[name] = n_reviews

In [7]:
android_clean = []
android_already_added = []

for app in android:
    name = app[0]
    n_reviews = app[3]
    
    if (android_reviews_max[name] == n_reviews) and (name not in android_already_added):
        android_clean.append(app)
        android_already_added.append(name)

Now Ios:

In [9]:
ios_duplicate_apps = []
ios_unique_apps = []

for app in ios:
    name = app[0]
    if name in ios_unique_apps:
        ios_duplicate_apps.append(name)
    else:
        ios_unique_apps.append(name)

In [10]:
ios_reviews_max = {}

for app in ios:
    name = app[0]
    n_reviews = app[3]
    
    #If the app is in the review dictionary and the review of the app in the dictionary is less than the review of the current app
    if name in ios_reviews_max and ios_reviews_max[name] < n_reviews:
        ios_reviews_max[name] = n_reviews
    
    #If the app is not in the review dictionary
    elif name not in ios_reviews_max:
        ios_reviews_max[name] = n_reviews

In [11]:
ios_clean = []
ios_already_added = []

for app in ios:
    name = app[0]
    n_reviews = app[3]
    
    if (ios_reviews_max[name] == n_reviews) and (name not in ios_already_added):
        ios_clean.append(app)
        ios_already_added.append(name)

Removing non-english apps

In [16]:
def is_english(string):
    non_ascii = 0
    
    #code finds if there are more than three non-ascii characters
    for character in string:
        if ord(character) > 127:
            non_ascii += 1
    
    #return false if there are more than three non-english characters
    if non_ascii > 3:
        return False
    else:
        return True

In [17]:
android_english = []
ios_english = []

#add all apps that have english characters (three or less non-ascii characters)
for app in android_clean:
    name = app[0]
    if is_english(name):
        android_english.append(app)       
for app in ios:
    name = app[1]
    if is_english(name):
        ios_english.append(app)

Isolate the free apps

In [19]:
android_final = []
ios_final = []

#If the price which is in the 7th index of the app list is '0', add to final list set
for app in android_english:
    price = app[7]
    if price == '0':
        android_final.append(app)
#If the price which is in the 4th index of the app list is '0', add to final list set  
for app in ios_english:
    price = app[4]
    if price == '0.0':
        ios_final.append(app)

# Building Frequency Tables

Frequency Table Functions

In [20]:
def freq_table(dataset, index):
    table = {}
    total = 0
    
    for row in dataset:
        total += 1
        value = row[index]
        if value in table:
            table[value] += 1
        else:
            table[value] = 1
    
    table_percentages = {}
    for key in table:
        percentage = (table[key] / total) * 100
        table_percentages[key] = percentage 
    
    return table_percentages

def display_table(dataset, index):
    table = freq_table(dataset, index)
    table_display = []
    for key in table:
        key_val_as_tuple = (table[key], key)
        table_display.append(key_val_as_tuple)
        
    table_sorted = sorted(table_display, reverse = True)
    for entry in table_sorted:
        print(entry[1], ':', entry[0])