In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import json

# Setting up Database Connection

In [2]:
import pymysql

host = os.getenv('MYSQL_HOST')
port = os.getenv('MYSQL_PORT')
user = os.getenv('MYSQL_USER')
password = os.getenv('MYSQL_PASSWORD')
database = 'quicklearn_tzn'

conn = pymysql.connect(
    host=host,
    port=int(port),
    user=user,
    passwd=password,
    db=database,
    charset='utf8mb4'
)

# Participants

In [4]:
# query = 'select * from participants;'
query = 'SELECT COUNT(logs.id) as logcount, ' \
            'participants.id,' \
            'participants.uid,' \
            'participants.device,' \
            'participants.locale,' \
            'participants.timezone,' \
            'participants.version,' \
            'participants.simcard_info as has_simcard,' \
            'participants.installed_apps,' \
            'participants.email,' \
            'participants.gender,' \
            'participants.phone_usage as who_uses_this_phone,' \
            'participants.age,' \
            'participants.src_language,' \
            'participants.target_language,' \
            'participants.proficiency,' \
            'participants.created_at,' \
            'participants.updated_at' \
        ' FROM ' \
            'participants' \
        ' INNER JOIN ' \
            'logs' \
        ' ON ' \
            'participants.uid = logs.uid' \
        ' GROUP BY ' \
            'participants.id;'

# print(query)

df_participants = pd.read_sql(query, con=conn)   
df_participants = df_participants.set_index("uid", drop=False)
# df_participants.to_csv ('./exports/participants.csv', index = None, header=True)

*Table export*: [participants.csv](./exports/participants.csv)

## Filter Participants

In [8]:
# according to timestamps and signed up timezones
valid_participants = [
    'e9b942',
    'e0ff8b',
    '1e215f',
    'a4af1f',
    '58d55d',
    '7463cd',
    '84de2d',
    '296e70',
    '6db38a',
    'c5a974',
    'e0f119',
    'f9b787',
    'f3c01f',
    '3602ff',
    '4ffec4',
    '8e9937',
    '3d3229',
    '53239d',
    'a56192',
    '519d4b',
    '8aeae1'
]

one_day_usage = [
    'f3c01f',
    '8e9937',
    '53239d'
]

maybes = [
    'b8d78b'
]

df_valid_participants = df_participants[df_participants['uid'].isin(valid_participants)]
df_valid_participants
# df_valid_participants.to_csv ('./exports/valid_participants.csv', index = None, header=True)

Unnamed: 0_level_0,logcount,id,uid,device,locale,timezone,version,has_simcard,installed_apps,email,gender,who_uses_this_phone,age,src_language,target_language,proficiency,created_at,updated_at
uid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
e9b942,76851,107,e9b942,samsung-SM-G355M,es_US,-6,3,,,ggarros@gmail.com,female,,43,spanish,english,Professional working proficiency,2017-03-12 14:16:59,2017-03-12 14:17:00
e0ff8b,43012,109,e0ff8b,OnePlus-ONEPLUS A3003,de_DE,1,3,,,,male,,20,german,english,Professional working proficiency,2017-03-12 16:40:39,2017-03-12 16:40:40
1e215f,199269,110,1e215f,Sony-E6653,de_DE,1,3,,,,female,,18,german,english,Full proficiency,2017-03-12 16:58:42,2017-03-12 16:58:42
a4af1f,57023,112,a4af1f,LGE-Nexus 5X,en_US,9,9,ready,"{""SensId"":""InstalledApps"",""SensVal"":{""com.skyp...",,unknow,unknown,0,xhosa,spanish,Beginner,2017-03-13 16:13:17,2017-05-29 17:15:01
58d55d,48539,115,58d55d,samsung-GT-I9300,en_GB,2,5,,"{""SensVal"":{""com.google.android.location"":true...",NdunduFabrice@gmail.com,male,,26,french,english,,2017-03-19 13:10:20,2017-03-19 13:26:56
7463cd,56883,116,7463cd,LGE-Nexus 5X,en_US,2,10,ready,"{""SensId"":""InstalledApps"",""SensVal"":{""com.conf...",,female,only,39,french,english,Conversant,2017-03-23 09:27:33,2017-06-13 07:56:57
84de2d,15333,117,84de2d,Vodafone-VF685,en_GB,2,5,,"{""SensVal"":{""com.android.defcontainer"":true,""c...",,male,,62,english,german,Conversant,2017-03-25 10:01:19,2017-04-21 20:47:40
296e70,1435,120,296e70,LGE-LG-H635,en_ZA,2,10,ready,"{""SensId"":""InstalledApps"",""SensVal"":{""com.lge....",,male,only,37,english,xhosa,,2017-05-25 15:28:20,2017-06-10 09:07:58
6db38a,7101,121,6db38a,CUBOT-CUBOT_NOTE_S,de_AT,1,10,ready,"{""SensId"":""InstalledApps"",""SensVal"":{""com.andr...",crikeyvirtualclassroom@gmail.com,male,family,51,english,german,Beginner,2017-05-25 17:24:12,2017-05-30 07:54:00
c5a974,59435,122,c5a974,LG Electronics-LG-X210,fr_FR,1,3,,,,male,,47,french,english,Limited proficiency,2017-05-26 23:29:05,2017-05-26 23:29:07


*Table export*: [valid_participants.csv](./exports/valid_participants.csv)

## Participant Descriptive Stats

In [9]:
rounded_decimals = 2

print('Total: %d participants' % (len(df_valid_participants)))
print('Number of participants with a single day of usage: %d' % len(one_day_usage))

# logs
print('Mean number of logs per participant: %f (SD=%f)' % (round(df_valid_participants['logcount'].mean(), rounded_decimals), round(df_valid_participants['logcount'].std(), rounded_decimals)))

# age
df_valid_age_participants = df_valid_participants[df_valid_participants['age']>0]
print('Mean age: %f (SD=%f), min: %d, max: %d' % (round(df_valid_age_participants['age'].mean(), rounded_decimals), round(df_valid_age_participants['age'].std(), rounded_decimals), df_valid_age_participants['age'].min(), df_valid_age_participants['age'].max()))

# gender
count_f = len(df_valid_participants[df_valid_participants['gender']=='female'])
count_m = len(df_valid_participants[df_valid_participants['gender']=='male'])
count_o = len(df_valid_participants[(df_valid_participants['gender']!='male') & (df_valid_participants['gender']!='female')])
print('%d female / %d male / %d other' % (count_f, count_m, count_o))

Total: 21 participants
Number of participants with a single day of usage: 3
Mean number of logs per participant: 29077.520000 (SD=46222.380000)
Mean age: 35.350000 (SD=12.520000), min: 17, max: 62
9 female / 11 male / 1 other


### Device Locales:

In [10]:
# locales
locales = df_valid_participants.groupby('locale')
# print('Device Locales:')
for key, item in locales:
    print('%s: %d' % (key, len(item)))

de_AT: 1
de_DE: 2
en_GB: 4
en_US: 4
en_ZA: 5
es_US: 1
fr_FR: 4


### Who uses this phone?

In [11]:
# phone usage (i.e., who_uses_this_phone?)
usage = df_valid_participants.groupby('who_uses_this_phone')
print('')
# print('Who uses this phone?')
filled_in = 0
for key, item in usage:
    print('%s: %d' % (key, len(item)))
    filled_in += len(item)
print('n/a: %d' % (len(df_valid_participants)-filled_in))


family: 1
only: 13
unknown: 1
n/a: 6


### Language Learning

In [12]:
# src language
l1 = df_valid_participants.groupby('src_language')
print('')
print('Native language (L1):')
for key, item in l1:
    print('%s: %d' % (key, len(item)))
    
# target language (group by proficiency)
l2 = df_valid_participants.groupby(['target_language', 'proficiency'])
print('')
print('Study language (L2) and proficiencies:')
languages = []
for key, item in l2:
#     print('%s total: %d' % (key, len(item)))
    
    if key[0] not in languages:
        print('-%s' % key[0])
        languages.append(key[0])

    print('\t %s: %d' % (key[1], len(item)))


Native language (L1):
english: 6
french: 11
german: 2
spanish: 1
xhosa: 1

Study language (L2) and proficiencies:
-afrikaans
	 None: 1
-english
	 Beginner: 8
	 Conversant: 1
	 Full proficiency: 1
	 Limited proficiency: 1
	 None: 1
	 Professional working proficiency: 2
-french
	 Beginner: 1
-german
	 Beginner: 1
	 Conversant: 1
-spanish
	 Beginner: 1
-xhosa
	 Beginner: 1
	 None: 1


### App Installation

In [18]:
# installation duration in days, i.e. installation date until last log entry
usage_days_per_participant = {} # contains uid and installed period
usage_periods = [] # contains only installed periods
for uid, row in df_valid_participants.iterrows():

#   remove participants with only one day of log dataa
    if uid not in one_day_usage:
        query = 'SELECT uid, ts FROM logs WHERE uid=\'' + uid + '\' ORDER BY id DESC LIMIT 1;'
        df_last_timestamp = pd.read_sql(query, con=conn)   

        diff = df_last_timestamp['ts'][0] - row['created_at']
        usage_days_per_participant[uid] = diff.days
        usage_periods.append(diff.days)

usage_periods = np.asarray(usage_periods)

print('uid \t # of days app installed (i.e., sensor data collected)')
print('-------------------------')
for uid in usage_days_per_participant:
    print('%s \t %d' % (uid, usage_days_per_participant[uid]))
    
print('')
print('%d participants with more than one day of app installation' % len(usage_periods))
print('Average number of days installed: %f (SD=%f), median: %d, min: %d, max: %d' % (round(usage_periods.mean(), rounded_decimals), round(usage_periods.std(), rounded_decimals), np.median(usage_periods), usage_periods.min(), usage_periods.max()))

uid 	 # of days app installed (i.e., sensor data collected)
-------------------------
e9b942 	 79
e0ff8b 	 13
1e215f 	 100
a4af1f 	 159
58d55d 	 63
7463cd 	 76
84de2d 	 149
296e70 	 17
6db38a 	 16
c5a974 	 84
e0f119 	 86
f9b787 	 77
3602ff 	 82
4ffec4 	 81
3d3229 	 14
a56192 	 12
519d4b 	 60
8aeae1 	 7

18 participants with more than one day of app installation
Average number of days installed: 65.280000 (SD=44.180000), median: 76, min: 7, max: 159


# App Usage

In [19]:
FLASCHCARD_CONDITION = 0
MULTIPLE_CHOICE_CONDITION = 1

NOTIFICATION_MODE = 0 # word reviewed via notification drawer
APP_MODE = 1          # word reviewed in app

## App Launches

In [22]:
'''
number of dedicated app launches, through notification or via app icon
'''
launch_per_participant = {}
total_app_launches = []
notif_app_launches = []
dedicated_app_launches = []
for uid, row in df_valid_participants.iterrows():

#   remove participants with only one day of log data
    if uid not in one_day_usage:
        launches = {
            'total': 0,
            'through_notif': 0,
            'through_app_icon': 0
        }
        query = 'SELECT * FROM logs WHERE uid=\'' + uid + '\' AND sensor_id=\'AppLaunch\';'

        df_app_launches = pd.read_sql(query, con=conn)   
        
        for index, item in df_app_launches.iterrows():
            
            val = json.loads(item['value'])
            
            launches['total'] += 1
            if(val['Mode']==NOTIFICATION_MODE):
                launches['through_notif'] += 1
            else:
                launches['through_app_icon'] += 1
        
        launch_per_participant[uid] = launches
        total_app_launches.append(launches['total'])
        notif_app_launches.append(launches['through_notif'])
        dedicated_app_launches.append(launches['through_app_icon'])

total_app_launches = np.asarray(total_app_launches)
notif_app_launches = np.asarray(notif_app_launches)
dedicated_app_launches = np.asarray(dedicated_app_launches)
launch_per_participant = pd.DataFrame.from_dict(launch_per_participant)

launch_per_participant
# launch_per_participant.to_csv ('./exports/launch_per_participant.csv', index = None, header=True)

Unnamed: 0,e9b942,e0ff8b,1e215f,a4af1f,58d55d,7463cd,84de2d,296e70,6db38a,c5a974,e0f119,f9b787,3602ff,4ffec4,3d3229,a56192,519d4b,8aeae1
through_app_icon,5,2,0,268,24,45,16,21,42,1,31,14,23,31,61,53,12,8
through_notif,14,5,12,22,48,21,264,6,0,3,0,0,0,0,1,0,0,0
total,19,7,12,290,72,66,280,27,42,4,31,14,23,31,62,53,12,8


In [24]:
print('App launches (total): mean: %f (SD=%f), median: %d, min: %d, max: %d' % (round(total_app_launches.mean(), rounded_decimals), round(total_app_launches.std(), rounded_decimals), np.median(total_app_launches), total_app_launches.min(), total_app_launches.max()))
print('App launches through notifications: mean: %f (SD=%f), median: %d, min: %d, max: %d' % (round(notif_app_launches.mean(), rounded_decimals), round(notif_app_launches.std(), rounded_decimals), np.median(notif_app_launches), notif_app_launches.min(), notif_app_launches.max()))
print('Dedicated app launches through app icon: mean: %f (SD=%f), median: %d, min: %d, max: %d' % (round(dedicated_app_launches.mean(), rounded_decimals), round(dedicated_app_launches.std(), rounded_decimals), np.median(dedicated_app_launches), dedicated_app_launches.min(), dedicated_app_launches.max()))

App launches (total): mean: 58.500000 (SD=82.670000), median: 29, min: 4, max: 290
App launches through notifications: mean: 22.000000 (SD=59.920000), median: 2, min: 0, max: 264
Dedicated app launches through app icon: mean: 36.500000 (SD=58.840000), median: 22, min: 0, max: 268


*Table export*: [launch_per_participant.csv](./exports/launch_per_participant.csv)

*TODO: Test for significance:*
- significantly more app launches through app icon than notifications?

## Word Reviews

In [25]:
'''
Day usage across total days of installation, i.e. on how many days the app was actually used
- what constitutes app usage? > at least one WordReviewed per day
'''
usage_days_per_participant = {} # contains uid and word reviews
usage_days = [] # contains only word reviews
for uid, row in df_valid_participants.iterrows():

#   remove participants with only one day of log data
    if uid not in one_day_usage:
        days = []
        query = 'SELECT uid, ts FROM logs WHERE uid=\'' + uid + '\' AND sensor_id=\'WordReviewed\';'
        df_word_reviews = pd.read_sql(query, con=conn)   
#         print(df_word_reviews)
        
#         print(uid, len(df_word_reviews))
        
        for index, item in df_word_reviews.iterrows():
#             print(item)
            d = '%s-%s-%s' % (item['ts'].day, item['ts'].month, item['ts'].year)
            if d not in days:
                days.append(d)
            
        usage_days_per_participant[uid] = days
        usage_days.append(len(days))

usage_days = np.asarray(usage_days)
print('uid \t # of review days')
print('-------------------------')
for uid in usage_days_per_participant:
    print('%s \t %d' % (uid, len(usage_days_per_participant[uid])))

print('')
print('Average number of days on which words were reviewed: %f (SD=%f), median: %d, min: %d, max: %d' % (round(usage_days.mean(), rounded_decimals), round(usage_days.std(), rounded_decimals), np.median(usage_days), usage_days.min(), usage_days.max()))


uid 	 # of review days
-------------------------
e9b942 	 14
e0ff8b 	 8
1e215f 	 53
a4af1f 	 33
58d55d 	 23
7463cd 	 45
84de2d 	 123
296e70 	 6
6db38a 	 15
c5a974 	 25
e0f119 	 7
f9b787 	 2
3602ff 	 4
4ffec4 	 6
3d3229 	 8
a56192 	 9
519d4b 	 4
8aeae1 	 5

Average number of days on which words were reviewed: 21.670000 (SD=28.450000), median: 8, min: 2, max: 123


### Vocabulary

In [29]:
'''
analysis of the words reviewed 
'''
words_per_participant = {} # contains uid and word reviews
total_words_reviewed = [] # contains only word reviews

fc_reviews = []
mc_reviews = []
for uid, row in df_valid_participants.iterrows():

#   remove participants with only one day of log dataa
    if uid not in one_day_usage:
        reviews = {
            'total': 0,
            'fc': 0,
            'mc': 0,
            'in_notif': 0,
            'in_app': 0,
            'correct': 0,
            'false': 0,
            'unique': 0,
            'repeat': 0
        }
        query = 'SELECT * FROM logs WHERE uid=\'' + uid + '\' AND sensor_id=\'WordReviewed\';'
        df_word_reviews = pd.read_sql(query, con=conn)   
        
        for index, item in df_word_reviews.iterrows():
            
            val = json.loads(item['value'])

            reviews['total'] += 1
            
            if(val['Condition']==FLASCHCARD_CONDITION):
                reviews['fc'] += 1
            else: # MULTIPLE_CHOICE_CONDITION
                reviews['mc'] += 1
            
            if(val['Mode']==NOTIFICATION_MODE):
                reviews['in_notif'] += 1
            else: # APP_MODE
                reviews['in_app'] += 1
            
            if(val['GuessedCorrectly']):
                reviews['correct'] += 1
            else:
                reviews['false'] += 1
            
            if(val['SeenBefore']):
                reviews['repeat'] += 1
            else:
                reviews['unique'] += 1
        
        words_per_participant[uid]=reviews
        total_words_reviewed.append(reviews['total'])

total_words_reviewed = np.asarray(total_words_reviewed)
words_per_participant = pd.DataFrame.from_dict(words_per_participant)
# words_per_participant = words_per_participant.set_index("uid", drop=False)
words_per_participant
# words_per_participant.to_csv ('./exports/words_per_participant.csv', index = None, header=True)

# print( \
#     'uid \t total \t' +\
#      'fc \t' + \
#      'mc \t' + \
#      'in_notif \t' + \
#      'in_app \t' + \
#      'correct \t' + \
#      'incorrect \t' + \
#      'unique \t' + \
#      'repetitions \t')
# print('-------------------------')
# for uid in words_per_participant:
#     print('%s \t %d \t %d \t %d \t %d \t %d \t %d \t %d \t %d \t %d' % (uid, words_per_participant[uid]['total'], words_per_participant[uid]['fc'], words_per_participant[uid]['mc'], words_per_participant[uid]['in_notif'], words_per_participant[uid]['in_app'], words_per_participant[uid]['correct'], words_per_participant[uid]['false'], words_per_participant[uid]['unique'], words_per_participant[uid]['repeat']))

Unnamed: 0,e9b942,e0ff8b,1e215f,a4af1f,58d55d,7463cd,84de2d,296e70,6db38a,c5a974,e0f119,f9b787,3602ff,4ffec4,3d3229,a56192,519d4b,8aeae1
correct,146,84,387,414,232,180,1050,117,47,70,37,0,0,2,82,11,8,1
false,2,7,28,140,147,39,36,24,58,5,24,13,53,86,120,88,19,17
fc,117,18,183,407,0,98,0,13,105,33,61,13,53,88,194,99,27,18
in_app,148,91,415,554,379,219,1086,141,105,75,61,13,53,88,202,99,27,18
in_notif,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
mc,31,73,232,147,379,121,1086,128,0,42,0,0,0,0,8,0,0,0
repeat,22,37,201,178,129,27,445,61,52,4,32,5,29,60,125,69,12,9
total,148,91,415,554,379,219,1086,141,105,75,61,13,53,88,202,99,27,18
unique,126,54,214,376,250,192,641,80,53,71,29,8,24,28,77,30,15,9


In [30]:
print('Average number of words reviewed: %f (SD=%f), median: %d, min: %d, max: %d' % (round(total_words_reviewed.mean(), rounded_decimals), round(total_words_reviewed.std(), rounded_decimals), np.median(total_words_reviewed), total_words_reviewed.min(), total_words_reviewed.max()))

Average number of words reviewed: 209.670000 (SD=257.300000), median: 102, min: 13, max: 1086


*Table export*: [words_per_participant.csv](./exports/words_per_participant.csv)

###### Observation
- the word review function in the notification drawer was not used at all

# Apps on Phone
Upon app install, we took a snapshot of all applications installed on participants' phones.

### App Blacklist
[app_blacklist.txt](./app_blacklist.txt)

In [164]:
app_blacklist = []
with open('./app_blacklist.txt', 'r') as blacklist_file:
    app_blacklist = [line.replace('\n','') for line in blacklist_file.readlines()]
    
'''
if app not in app_blacklist and not app.startswith('com.google') and not app.startswith('com.android') and not app.startswith('com.sec'):
'''
def is_blacklisted(app):
    return (app in app_blacklist or app.startswith('com.google') or app.startswith('com.android') or app.startswith('com.sec'))

In [169]:
'''
filter and count all installed apps
'''
installed_apps = {}
deinstalled_apps = {}
for uid, row in df_valid_participants.iterrows():
    if(row['installed_apps']!=None):
        val = json.loads(row['installed_apps'])
        
#         print(val)
        
        # package=True, app is installed
        # package=False, the app has been deinstalled, but data retained
        
        for app in val['SensVal']:
            installed = val['SensVal'][app] 
            
            # filter by blacklist, also remove google and android system apps (i.e., default installations including YouTube)

            if(not is_blacklisted(app)):
                if installed and app not in installed_apps:
                    installed_apps[app] = [1]
                elif installed and app in installed_apps:
                    installed_apps[app][0] += 1

                if not installed and app not in deinstalled_apps:
                    deinstalled_apps[app] = [1]
                elif not installed and app in deinstalled_apps:
                    deinstalled_apps[app][0] += 1
                
installed_apps = pd.DataFrame.from_dict(installed_apps)
deinstalled_apps = pd.DataFrame.from_dict(deinstalled_apps)
installed_apps
# installed_apps.to_csv ('./exports/installed_apps.csv', index = None, header=True)

Unnamed: 0,com.skype.raider,com.quicinc.cne.CNEService,com.whatsapp,com.lge.HiddenMenu,android.autoinstalls.config.google.nexus,com.duolingo,com.kodarkooperativet.blackplayerfree,com.qti.qualcomm.datastatusnotification,de.hafas.android.db,de.sde.mobile,...,com.sonyericsson.customization.presetcontacts.res.overlay_305,com.sonymobile.mtp.extension.fotaupdate,com.sonymobile.music.googlelyricsplugin,com.sonymobile.holla.rcid,com.sonyericsson.tetherentitlementcheck,com.sonyericsson.advancedwidget.clock,com.sonyericsson.advancedwidget.photo,com.sonymobile.providers.callitemsprovider,com.sony.tvsideview.videoph,com.sonyericsson.android.addoncamera.artfilter
0,3,3,16,2,2,2,1,2,2,1,...,1,1,1,1,1,1,1,1,1,1


*Table export*: [installed_apps.csv](./exports/installed_apps.csv)

In [166]:
TOP_N = 20

most_installed_app = installed_apps.idxmax(axis=1)[0]
most_installed_count = installed_apps[most_installed_app][0]

top_n_apps = {}
while(len(top_n_apps) < TOP_N):    

    for app, item in installed_apps.iteritems():
        app_count = item[0]

        if(len(top_n_apps)<TOP_N):
#             print('comparing', installed_apps[app], most_installed_count, (installed_apps[app]==most_installed_count))
            if(app_count==most_installed_count):
                top_n_apps[app] = app_count
        else:
            break
    most_installed_count -= 1

print('Top %d apps:' % TOP_N)
print('-----------------------')
print('App \t # of installs')
print('-----------------------')
count = 1
for app in top_n_apps:
    print('%d: %s \t %d' % (count, app, top_n_apps[app]))
    count += 1

Top 20 apps:
-----------------------
App 	 # of installs
-----------------------
1: com.whatsapp 	 16
2: com.facebook.katana 	 9
3: com.facebook.orca 	 9
4: com.lenovo.anyshare.gps 	 7
5: org.simalliance.openmobileapi.service 	 7
6: com.twitter.android 	 6
7: com.ubercab 	 5
8: com.fmm.dm 	 5
9: com.monotype.android.font.samsungsans 	 5
10: com.wssyncmldm 	 5
11: com.monotype.android.font.cooljazz 	 5
12: com.wsomacp 	 5
13: com.monotype.android.font.chococooky 	 5
14: com.fmm.ds 	 5
15: com.monotype.android.font.rosemary 	 5
16: com.wssnps 	 5
17: com.osp.app.signin 	 5
18: com.samsung.android.MtpApplication 	 5
19: com.samsung.sec.android.application.csc 	 5
20: com.vodafone.vodafone360updates 	 5


Apps that were deinstalled, but their data was retained:

In [167]:
deinstalled_apps

Unnamed: 0,com.jrdcom.setupwizard,com.lge.exchange,com.lge.gcuv,com.huawei.hicare,za.co.collectiv,za.co.reward,com.vouchercloud.android,com.jrdcom.setupwizard2,com.sonymobile.usm,com.sonyericsson.startupflagservice
0,1,1,1,1,1,1,1,1,1,1


In [46]:
# cleanup
conn.close()