#### Introduction
We only build apps that are free to download and install, and our main source of revenue consists of in-app ads. This means our revenue for any given app is mostly influenced by the number of users who use our app — the more users that see and engage with the ads, the better. 

#### Anticipated Outcome
Our goal is to analyze data to help our developers understand what type of apps are likely to attract more users.

In [4]:
# Open csv files and save each as a list of lists

def open_dataset(file_name):
    
    opened_file = open(file_name)    
    from csv import reader
    read_file = reader(opened_file)
    data = list(read_file)
    return data

apple_data=open_dataset('resources/AppleStore.csv')
google_data=open_dataset('resources/googleplaystore.csv')

For additional documention information, use the following links:
1. Google dataset: [link](https://www.kaggle.com/lava18/google-play-store-apps)
2. Apple dataset: [link](https://www.kaggle.com/ramamet4/app-store-apple-data-set-10k-apps)

In [5]:
# function slices dataset at designated indices to allow for exploration

def explore_data(dataset, start, end, rows_and_columns=False):
    dataset_slice = dataset[start:end]    
    for row in dataset_slice:
        print(row)
        print('\n') # adds a new (empty) line after each row

    if rows_and_columns:
        print('Number of rows:', len(dataset))
        print('Number of columns:', len(dataset[0]))

In [10]:
# Print header and first few rows for Apple dataset, including number of rows and columns
apple_header=apple_data[0]
explore_apple=explore_data(apple_data,1, 6, True)       

print("Header")
print(apple_header)
print(explore_apple)

['284882215', 'Facebook', '389879808', 'USD', '0', '2974676', '212', '3.5', '3.5', '95', '4+', 'Social Networking', '37', '1', '29', '1']


['389801252', 'Instagram', '113954816', 'USD', '0', '2161558', '1289', '4.5', '4', '10.23', '12+', 'Photo & Video', '37', '0', '29', '1']


['529479190', 'Clash of Clans', '116476928', 'USD', '0', '2130805', '579', '4.5', '4.5', '9.24.12', '9+', 'Games', '38', '5', '18', '1']


['420009108', 'Temple Run', '65921024', 'USD', '0', '1724546', '3842', '4.5', '4', '1.6.2', '9+', 'Games', '40', '5', '1', '1']


['284035177', 'Pandora - Music & Radio', '130242560', 'USD', '0', '1126879', '3594', '4', '4.5', '8.4.1', '12+', 'Music', '37', '4', '1', '1']


Number of rows: 7198
Number of columns: 16
Header
['id', 'track_name', 'size_bytes', 'currency', 'price', 'rating_count_tot', 'rating_count_ver', 'user_rating', 'user_rating_ver', 'ver', 'cont_rating', 'prime_genre', 'sup_devices.num', 'ipadSc_urls.num', 'lang.num', 'vpp_lic']
None


In [28]:
# Print header and first few rows for Google dataset, including number of rows and columns

google_header=google_data[0]
explore_google=explore_data(google_data,1, 6, True)       

print("Header")
print(google_header)
print(explore_google)

['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', '7-Jan-18', '1.0.0', '4.0.3 and up']


['Coloring book moana', 'ART_AND_DESIGN', '3.9', '967', '14M', '500,000+', 'Free', '0', 'Everyone', 'Art & Design;Pretend Play', '15-Jan-18', '2.0.0', '4.0.3 and up']


['U Launcher Lite – FREE Live Cool Themes, Hide Apps', 'ART_AND_DESIGN', '4.7', '87510', '8.7M', '5,000,000+', 'Free', '0', 'Everyone', 'Art & Design', '1-Aug-18', '1.2.4', '4.0.3 and up']


['Sketch - Draw & Paint', 'ART_AND_DESIGN', '4.5', '215644', '25M', '50,000,000+', 'Free', '0', 'Teen', 'Art & Design', '8-Jun-18', 'Varies with device', '4.2 and up']


['Pixel Draw - Number Art Coloring Book', 'ART_AND_DESIGN', '4.3', '967', '2.8M', '100,000+', 'Free', '0', 'Everyone', 'Art & Design;Creativity', '20-Jun-18', '1.1', '4.4 and up']


Number of rows: 10841
Number of columns: 13
Header
['App', 'Category', 'Rating', 'Reviews', 'Size', 'Instal

In [16]:
# Check for missing rating from google_data
print(google_data[10473])

# delete row with missing data
# del google_data[10473]

['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', '11-Feb-18', '1.0.19', '4.0 and up', '']


In [18]:
# Check that row was deleted
len(google_data)

10841

In [21]:
# Check for duplicate apps in Apple data
ios_unique_apps = [] 
ios_duplicate_apps = [] 

for app in apple_data: 
    app_name = app[2] 

    if app_name not in ios_unique_apps:
        ios_unique_apps.append(app_name)
    else:
        ios_duplicate_apps.append(app_name)
        
print('unique apps:', len(ios_unique_apps))
print('duplicate apps: ', len(ios_duplicate_apps))

unique apps: 7108
duplicate apps:  90


In [32]:
# Check for duplicate apps in Google data

google_unique_apps = [] 
google_duplicate_apps = [] 

for app in google_data: 
    app_name = app[0] 

    if app_name not in google_unique_apps:
        google_unique_apps.append(app_name)
    else:
        google_duplicate_apps.append(app_name)
        
print('unique apps:',len(google_unique_apps))
print('duplicate apps: ', len(google_duplicate_apps))

unique apps: 9660
duplicate apps:  1181
