# Profitable app profiles

# Scenario

Our aim in this project is to find mobile app profiles that are profitable. Assuming We're working as data analysts for a company that builds mobile apps, and our job is to enable our team of developers to make data-driven decisions with respect to the kind of apps they build.

At our company, we only build apps that are free to download and install, and our main source of revenue consists of in-app ads. This means that our revenue for any given app is mostly influenced by the number of users that use our app. Our goal for this project is to analyze data to help our developers understand what kinds of apps are likely to attract more users.



In [1]:
def read_file(file):
    from csv import reader
    return list(reader(open(file,encoding="utf-8")))

In [2]:
android = read_file("googleplaystore.csv")

In [3]:
header = android[0]

In [4]:
header

['App',
 'Category',
 'Rating',
 'Reviews',
 'Size',
 'Installs',
 'Type',
 'Price',
 'Content Rating',
 'Genres',
 'Last Updated',
 'Current Ver',
 'Android Ver']

In [5]:
android = android[1:]

In [6]:
len(android)

10841

In [7]:
for i in android:
    if i[1] == '':
        print(i[1])

In [8]:
lst = []
for i in android:
    if i[1] not in lst:
        lst.append(i[1])
print(lst)

['ART_AND_DESIGN', 'AUTO_AND_VEHICLES', 'BEAUTY', 'BOOKS_AND_REFERENCE', 'BUSINESS', 'COMICS', 'COMMUNICATION', 'DATING', 'EDUCATION', 'ENTERTAINMENT', 'EVENTS', 'FINANCE', 'FOOD_AND_DRINK', 'HEALTH_AND_FITNESS', 'HOUSE_AND_HOME', 'LIBRARIES_AND_DEMO', 'LIFESTYLE', 'GAME', 'FAMILY', 'MEDICAL', 'SOCIAL', 'SHOPPING', 'PHOTOGRAPHY', 'SPORTS', 'TRAVEL_AND_LOCAL', 'TOOLS', 'PERSONALIZATION', 'PRODUCTIVITY', 'PARENTING', 'WEATHER', 'VIDEO_PLAYERS', 'NEWS_AND_MAGAZINES', 'MAPS_AND_NAVIGATION', '1.9']


In [9]:
for i in android:
    if i[1] == '1.9':
        print(i)

['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']


In [10]:
android.index(['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'])

10472

In [11]:
android[10472][12]

IndexError: list index out of range

In [12]:
android[10472].insert(1,"LIFESTYLE")

In [13]:
android[10472]

['Life Made WI-Fi Touchscreen Photo Frame',
 'LIFESTYLE',
 '1.9',
 '19',
 '3.0M',
 '1,000+',
 'Free',
 '0',
 'Everyone',
 '',
 'February 11, 2018',
 '1.0.19',
 '4.0 and up']

In [14]:
android[10472][-4] = 'Lifestyle'

In [15]:
android[10472]

['Life Made WI-Fi Touchscreen Photo Frame',
 'LIFESTYLE',
 '1.9',
 '19',
 '3.0M',
 '1,000+',
 'Free',
 '0',
 'Everyone',
 'Lifestyle',
 'February 11, 2018',
 '1.0.19',
 '4.0 and up']

# Removing Duplicate 

In [16]:
unique = []
duplicate = []
for i in android:
    if i[0] in unique:
        duplicate.append(i[0])
    else:
        unique.append(i[0])

In [17]:
unique
len(unique)

9660

In [18]:
duplicate
len(duplicate)


1181

In [19]:
reviews = {}
for i in android:
    app_name = i[0]
    nreviews = float(i[3])
    if i[0] not in reviews:
        reviews[app_name] = nreviews
    elif app_name in reviews and nreviews> reviews[app_name] :
        reviews[i[0]] = nreviews

In [20]:
android_clean = []
already_add = []

for i in android:
    
    app_name = i[0]
    n_reviews = int(i[3])
    if n_reviews == reviews[app_name] and app_name not in already_add :
        android_clean.append(i)
        already_add.append(app_name)

In [21]:
# def is_English(app_name):
#     lst = []
#     for i in app_name:
#         if ord(i) <= 127:
#             lst.append(True)
#         else:
#             lst.append(False)
#is_English = lambda x:[ True if ord(i)<=127 else False for i in x ]#     return True if lst.count(False) <= 3 else False

In [22]:
is_English = lambda x:[ True if ord(i)<=127 else False for i in x ].count(False) <= 3 
is_English("Instagram😋😋😋")

True

In [23]:
American_app = []
for i in android_clean:
    if is_English(i[0]):
        American_app.append(i)

In [24]:
len(American_app)

9615

In [25]:
len(American_app)
American_app[0][-7]

'Free'

In [26]:
android_final = []
for i in American_app:
    if i[7] == '0' :
        android_final.append(i)

In [27]:
len(android_final)

8865

In [28]:
categorie = []
for i in android_final:
    categorie.append(i[1])

In [29]:
uni_cat =  set(categorie)
uni_cat = list(uni_cat)

In [30]:
len(uni_cat)

33

In [31]:
print(uni_cat)

['WEATHER', 'TOOLS', 'DATING', 'BUSINESS', 'PERSONALIZATION', 'SPORTS', 'BEAUTY', 'HOUSE_AND_HOME', 'LIFESTYLE', 'TRAVEL_AND_LOCAL', 'PHOTOGRAPHY', 'MAPS_AND_NAVIGATION', 'GAME', 'AUTO_AND_VEHICLES', 'PARENTING', 'PRODUCTIVITY', 'ART_AND_DESIGN', 'FOOD_AND_DRINK', 'ENTERTAINMENT', 'HEALTH_AND_FITNESS', 'FAMILY', 'MEDICAL', 'NEWS_AND_MAGAZINES', 'EDUCATION', 'LIBRARIES_AND_DEMO', 'COMICS', 'SOCIAL', 'SHOPPING', 'BOOKS_AND_REFERENCE', 'COMMUNICATION', 'EVENTS', 'FINANCE', 'VIDEO_PLAYERS']


In [32]:
avg_intallation = []

In [33]:
for cat in uni_cat:
    total_int = 0
    no_ins = 0
    for i in android_final:
        categories = i[1]
        if cat == categories:
            n_install = i[5]
            n_install = int(n_install.replace(",","").strip("+")) 
            #print(cat,n_install)