# Profitable app profiles

# Scenario

Our aim in this project is to find mobile app profiles that are profitable. Assuming We're working as data analysts for a company that builds mobile apps, and our job is to enable our team of developers to make data-driven decisions with respect to the kind of apps they build.

At our company, we only build apps that are free to download and install, and our main source of revenue consists of in-app ads. This means that our revenue for any given app is mostly influenced by the number of users that use our app. Our goal for this project is to analyze data to help our developers understand what kinds of apps are likely to attract more users.



In [1]:
def read_file(filename):
    from csv import reader
    return list(reader(open(filename, encoding = "utf-8")))

In [2]:
android = read_file("datasets/googleplaystore.csv")
len(android)

10842

In [3]:
header = android[0]
android = android[1:]

In [4]:
for i in android:
    if i[1] == "":
        print(i[1])

In [5]:
lst = [] # [ART_AND_DESIGN,]

for i in android:
    if i[1] not in lst: # ART_AND_DESIGN
        lst.append(i[1])

print(lst)

['ART_AND_DESIGN', 'AUTO_AND_VEHICLES', 'BEAUTY', 'BOOKS_AND_REFERENCE', 'BUSINESS', 'COMICS', 'COMMUNICATION', 'DATING', 'EDUCATION', 'ENTERTAINMENT', 'EVENTS', 'FINANCE', 'FOOD_AND_DRINK', 'HEALTH_AND_FITNESS', 'HOUSE_AND_HOME', 'LIBRARIES_AND_DEMO', 'LIFESTYLE', 'GAME', 'FAMILY', 'MEDICAL', 'SOCIAL', 'SHOPPING', 'PHOTOGRAPHY', 'SPORTS', 'TRAVEL_AND_LOCAL', 'TOOLS', 'PERSONALIZATION', 'PRODUCTIVITY', 'PARENTING', 'WEATHER', 'VIDEO_PLAYERS', 'NEWS_AND_MAGAZINES', 'MAPS_AND_NAVIGATION', '1.9']


In [6]:
for i in android:
    if i[1] == '1.9':
        print(i)

['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']


In [7]:
print(android[0])

['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up']


In [8]:
print(header)

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


In [9]:
len(['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'])

12

In [10]:
len(['Photo Editor & Candy Camera & Grid & ScrapBook', 'ART_AND_DESIGN', '4.1', '159', '19M', '10,000+', 'Free', '0', 'Everyone', 'Art & Design', 'January 7, 2018', '1.0.0', '4.0.3 and up'])

13

In [11]:
android.index(['Life Made WI-Fi Touchscreen Photo Frame', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'])

10472

In [12]:
android[10472]

['Life Made WI-Fi Touchscreen Photo Frame',
 '1.9',
 '19',
 '3.0M',
 '1,000+',
 'Free',
 '0',
 'Everyone',
 '',
 'February 11, 2018',
 '1.0.19',
 '4.0 and up']

In [13]:
"LIFESTYLE" in lst

True

In [14]:
android[10472].insert(1,"LIFESTYLE")
print(android[10472])

['Life Made WI-Fi Touchscreen Photo Frame', 'LIFESTYLE', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']


In [15]:
categories = []

for i in android:
    if i[-4] not in categories:
        categories.append(i[-4])
        
categories

['Art & Design',
 'Art & Design;Pretend Play',
 'Art & Design;Creativity',
 'Art & Design;Action & Adventure',
 'Auto & Vehicles',
 'Beauty',
 'Books & Reference',
 'Business',
 'Comics',
 'Comics;Creativity',
 'Communication',
 'Dating',
 'Education;Education',
 'Education',
 'Education;Creativity',
 'Education;Music & Video',
 'Education;Action & Adventure',
 'Education;Pretend Play',
 'Education;Brain Games',
 'Entertainment',
 'Entertainment;Music & Video',
 'Entertainment;Brain Games',
 'Entertainment;Creativity',
 'Events',
 'Finance',
 'Food & Drink',
 'Health & Fitness',
 'House & Home',
 'Libraries & Demo',
 'Lifestyle',
 'Lifestyle;Pretend Play',
 'Adventure;Action & Adventure',
 'Arcade',
 'Casual',
 'Card',
 'Casual;Pretend Play',
 'Action',
 'Strategy',
 'Puzzle',
 'Sports',
 'Music',
 'Word',
 'Racing',
 'Casual;Creativity',
 'Casual;Action & Adventure',
 'Simulation',
 'Adventure',
 'Board',
 'Trivia',
 'Role Playing',
 'Simulation;Education',
 'Action;Action & Adventure

In [16]:
for i in android:
    if i[-4] == '':
        print(i)

['Life Made WI-Fi Touchscreen Photo Frame', 'LIFESTYLE', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up']


In [17]:
android.index(['Life Made WI-Fi Touchscreen Photo Frame', 'LIFESTYLE', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', '', 'February 11, 2018', '1.0.19', '4.0 and up'])

10472

In [18]:
for i in categories:
    if "Lifestyle" in i:
        print(i)

Lifestyle
Lifestyle;Pretend Play
Lifestyle;Education


In [19]:
for i in android:
    if i[-4] == "Lifestyle":
        print(i)

['Dollhouse Decorating Games', 'LIFESTYLE', '4.1', '18968', '32M', '5,000,000+', 'Free', '0', 'Teen', 'Lifestyle', 'April 26, 2018', '5.1', '4.1 and up']
['metroZONE', 'LIFESTYLE', '4.1', '47497', '34M', '10,000,000+', 'Free', '0', 'Everyone', 'Lifestyle', 'June 8, 2018', '5.3.0.54.7', '5.0 and up']
['Easy Hair Style Design', 'LIFESTYLE', '4.3', '601', '5.1M', '100,000+', 'Free', '0', 'Everyone', 'Lifestyle', 'December 20, 2017', '1.0', '2.3 and up']
['Black Wallpaper, AMOLED, Dark Background: Darkify', 'LIFESTYLE', '4.6', '51357', '80M', '5,000,000+', 'Free', '0', 'Everyone', 'Lifestyle', 'July 31, 2018', '8.0', '4.0 and up']
['Girly Wallpapers Backgrounds', 'LIFESTYLE', '4.4', '13565', '3.3M', '1,000,000+', 'Free', '0', 'Everyone', 'Lifestyle', 'August 5, 2018', '2.5', '4.0 and up']
['Chart - Myanmar Keyboard', 'LIFESTYLE', '4.4', '39364', '28M', '5,000,000+', 'Free', '0', 'Everyone', 'Lifestyle', 'July 17, 2018', '10.19', '4.0.3 and up']
['Easy Makeup Tutorials', 'LIFESTYLE', '4.3',

In [20]:
ord("😋")

128523

In [21]:
chr(128523)

'😋'

In [22]:
android[10472][-4] = "Lifestyle"
print(android[10472])

['Life Made WI-Fi Touchscreen Photo Frame', 'LIFESTYLE', '1.9', '19', '3.0M', '1,000+', 'Free', '0', 'Everyone', 'Lifestyle', 'February 11, 2018', '1.0.19', '4.0 and up']


### Removing Duplicates Apps

In [23]:
for i in android:
    if i[0] == "Instagram":
        print(i)

['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']


In [24]:
duplicate_app = [] # [Instgram]
unique_app = [] # [Instgram]

for i in android:
    app = i[0] # Instagram
    if app in unique_app: # 
        duplicate_app.append(app)
    else:
        unique_app.append(app) # Instgram

In [25]:
len(duplicate_app)

1181

In [26]:
len(unique_app)

9660

* SET

In [27]:
st = {"Anas","Mubashir","Daniyal","Daniyal"}
type(st)

set

In [28]:
st

{'Anas', 'Daniyal', 'Mubashir'}

In [29]:
app_name = []

for i in android:
    app_name.append(i[0])

len(app_name)

10841

In [30]:
unique = set(app_name)
len(unique)

9660

In [31]:
10841 - 9660

1181

In [32]:
for i in android:
    if i[0] == "Instagram":
        print(i)

['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577446', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66577313', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']
['Instagram', 'SOCIAL', '4.5', '66509917', 'Varies with device', '1,000,000,000+', 'Free', '0', 'Teen', 'Social', 'July 31, 2018', 'Varies with device', 'Varies with device']


In [33]:
print(header)

['App', 'Category', 'Rating', 'Reviews', 'Size', 'Installs', 'Type', 'Price', 'Content Rating', 'Genres', 'Last Updated', 'Current Ver', 'Android Ver']


In [34]:
# review = {}

# for i in android:
#     app_name = i[0]
#     n_reviews = int(i[3])
#     if app_name in review:
#         if n_reviews > review[app_name]:
#             review[app_name] = n_reviews
#     else:
#         review[app_name] = n_review

In [35]:
len(android)

10841

In [36]:
android[0]

['Photo Editor & Candy Camera & Grid & ScrapBook',
 'ART_AND_DESIGN',
 '4.1',
 '159',
 '19M',
 '10,000+',
 'Free',
 '0',
 'Everyone',
 'Art & Design',
 'January 7, 2018',
 '1.0.0',
 '4.0.3 and up']

In [37]:
# high_rev = {} # {"Instagram": 12}

# for i in android:
    
#     app_name = i[0] # Instagram
#     review = int(i[3]) # 12
    
#     if app_name not in high_rev:
#         high_rev[app_name] = reviews
        
#     else:
#         if high_rev[app_name] < reviews:
#             high_rev[app_name] = reviews # 12

In [38]:
# len(high_rev)

In [39]:
reviews = {} # {Instagram:12}

for i in android:
    app_name = i[0] # Instagram
    n_review = int(i[3]) # 12
    if app_name not in reviews:
        reviews[app_name] = n_review
    elif app_name in reviews and n_review > reviews[app_name]:
        reviews[app_name] = n_review

In [40]:
(reviews)

{'Photo Editor & Candy Camera & Grid & ScrapBook': 159,
 'Coloring book moana': 974,
 'U Launcher Lite – FREE Live Cool Themes, Hide Apps': 87510,
 'Sketch - Draw & Paint': 215644,
 'Pixel Draw - Number Art Coloring Book': 967,
 'Paper flowers instructions': 167,
 'Smoke Effect Photo Maker - Smoke Editor': 178,
 'Infinite Painter': 36815,
 'Garden Coloring Book': 13791,
 'Kids Paint Free - Drawing Fun': 121,
 'Text on Photo - Fonteee': 13880,
 'Name Art Photo Editor - Focus n Filters': 8788,
 'Tattoo Name On My Photo Editor': 44829,
 'Mandala Coloring Book': 4326,
 '3D Color Pixel by Number - Sandbox Art Coloring': 1518,
 'Learn To Draw Kawaii Characters': 55,
 'Photo Designer - Write your name with shapes': 3632,
 '350 Diy Room Decor Ideas': 27,
 'FlipaClip - Cartoon animation': 194216,
 'ibis Paint X': 224399,
 'Logo Maker - Small Business': 450,
 "Boys Photo Editor - Six Pack & Men's Suit": 654,
 'Superheroes Wallpapers | 4K Backgrounds': 7699,
 'Mcqueen Coloring pages': 65,
 'HD Mi

In [41]:
len(android)

10841

In [42]:
len(reviews)

9660

In [43]:
android_clean = []

for i in android:
    app_name = i[0] # Instagram
    n_review = int(i[3]) # 12
#     print(app_name, n_review)
    if n_review == reviews[app_name]: # 12 == 12
        android_clean.append(i)

In [44]:
len(android_clean)

10055

In [45]:
for i in android_clean:
    if i[0] == "ZOOM Cloud Meetings":
        print(i)

['ZOOM Cloud Meetings', 'BUSINESS', '4.4', '31614', '37M', '10,000,000+', 'Free', '0', 'Everyone', 'Business', 'July 20, 2018', '4.1.28165.0716', '4.0 and up']
['ZOOM Cloud Meetings', 'BUSINESS', '4.4', '31614', '37M', '10,000,000+', 'Free', '0', 'Everyone', 'Business', 'July 20, 2018', '4.1.28165.0716', '4.0 and up']


In [46]:
duplicate_app

['Quick PDF Scanner + OCR FREE',
 'Box',
 'Google My Business',
 'ZOOM Cloud Meetings',
 'join.me - Simple Meetings',
 'Box',
 'Zenefits',
 'Google Ads',
 'Google My Business',
 'Slack',
 'FreshBooks Classic',
 'Insightly CRM',
 'QuickBooks Accounting: Invoicing & Expenses',
 'HipChat - Chat Built for Teams',
 'Xero Accounting Software',
 'MailChimp - Email, Marketing Automation',
 'Crew - Free Messaging and Scheduling',
 'Asana: organize team projects',
 'Google Analytics',
 'AdWords Express',
 'Accounting App - Zoho Books',
 'Invoice & Time Tracking - Zoho',
 'join.me - Simple Meetings',
 'Invoice 2go — Professional Invoices and Estimates',
 'SignEasy | Sign and Fill PDF and other Documents',
 'Quick PDF Scanner + OCR FREE',
 'Genius Scan - PDF Scanner',
 'Tiny Scanner - PDF Scanner App',
 'Fast Scanner : Free PDF Scan',
 'Mobile Doc Scanner (MDScan) Lite',
 'TurboScan: scan documents and receipts in PDF',
 'Tiny Scanner Pro: PDF Doc Scan',
 'Docs To Go™ Free Office Suite',
 'OfficeS

In [47]:
android_clean = []
already_added = []

for i in android:
    app_name = i[0] # Instagram
    n_review = int(i[3]) # 12
#     print(app_name, n_review)
    if n_review == reviews[app_name] and app_name not in already_added: # 12 == 12
        android_clean.append(i)
        already_added.append(app_name)

In [48]:
len(android_clean)

9660

In [49]:
def is_English(app_name):
    lst = []
    for i in app_name:
        if ord(i) <= 127:
            lst.append(True)
        else:
            lst.append(False)
        
    if lst.count(False)<=3:
        return True
    return False

In [50]:
#chr(😋)
is_English("Instagram😋😋😋")

True

In [51]:
c=[True,True,True,True,True,True,True,False]
c.count(False)

1

In [52]:
American_app = []
for i in android_clean:
    if is_English(i[0]):
        American_app.append(i)


In [53]:
len(American_app)

9615

In [54]:
ord('😋')

128523

In [55]:
chr(128529)

'😑'