In [2]:
#Needed Libraries
import pandas as pd
import pymysql
import numpy as np
from IPython.display import display

In [41]:
#setting viewing options for dataframe outputs
pd.set_option('display.max_columns', 500)
pd.set_option('max_colwidth',400)
pd.set_option('display.max_rows', 600)
pd.set_option('display.min_rows', 300)

### Database Connection

In [3]:
# Database Connection
# Credentials have been xx'd out for privacy purposes
db = pymysql.connect(host = 'xx',user = 'xx',password='xx',port = 3306,database = 'arifu_live')
cursor = db.cursor()

In [4]:
# Test Connection
cursor.execute('SHOW DATABASES')

27

### Pulling and Reading Datasets

In [5]:
# pulling learner_level
analytics_learner_query = '''SELECT * from analytics_learner_lvl_81'''
analytics_learner = pd.read_sql(analytics_learner_query,db)
#analytics_learner.to_csv('analytics_learner_81.csv')
analytics_learner.phone_number.nunique()

9655

In [6]:
# pulling interactions dataset
query = '''SELECT smsoutbox.id AS outbox_id, smsinbox.id AS inbox_id, smsoutbox.learner_id, learners.phone_number, projects.partner_id, partners.name AS partner_name, projects.id AS project_id, projects.project_name, program_variations.program_id, programs.program_code, programs.program_name, packages.variation_id, program_variations.variation_code, program_variations.variation_name, smsoutbox.package_id, smsoutbox.message_type, smsoutbox.object_id, smsoutbox.message_out, smsinbox.message_in, smsoutbox.sender, smsinbox.created_at FROM smsinbox LEFT JOIN smsoutbox ON smsoutbox.link_id = smsinbox.id LEFT JOIN learners ON smsoutbox.learner_id = learners.learner_id LEFT JOIN objects ON objects.id = smsoutbox.object_id LEFT JOIN packages ON packages.id = smsoutbox.package_id LEFT JOIN program_variations ON packages.variation_id = program_variations.id LEFT JOIN programs ON programs.id = program_variations.program_id LEFT JOIN projects ON programs.project_id = projects.id LEFT JOIN partners ON projects.partner_id = partners.id WHERE smsinbox.created_at >= "2020-07-18 00:00:00" and projects.project_name = "Google"'''
google_int = pd.read_sql(query, db)
google_int.learner_id.nunique()

9957

In [7]:
# Reading AFE Codes
AFE_codes_mb = pd.read_excel('AFE_codes.xlsx',sheet_name= 'menu_based')
AFE_codes_ln_seq = pd.read_excel('AFE_codes.xlsx',sheet_name= 'LN_sequential')
AFE_codes_ln_nonseq = pd.read_excel('AFE_codes.xlsx',sheet_name= 'LN_grouped')
AFE_codes_mb.head(2)

Unnamed: 0,program_code,start,end
0,GAFE,,
1,GSAVE,(1/8),(8/8)


In [8]:
# Reading profile data
profile_int = pd.read_excel('Data/Profile Data 2020 11 11.xlsx')

In [9]:
# Pulling Knowledge Scores
knowledge_query = '''select * from analytics_knowledge_score_user_level where partner_id = 81'''
knowledge_score = pd.read_sql(knowledge_query,db)

In [10]:
# Certification rates
certificate_query = '''select id,learner_id,learner,program,partner from certificate_data  where partner = "GOOGLE"'''
certificate_data = pd.read_sql(certificate_query,db)

In [11]:
# Reading Cleaned Profile data that was generated 
profile_cleaned = pd.read_excel('profiles.xlsx',sheet_name = 'age_gender')
profile_cleaned['phone_number'] = profile_cleaned['phone_number'].astype(str)

In [12]:
# Num interactions
df_num_int = google_int.groupby('phone_number').message_out.count().reset_index()
df_num_int.rename(columns = {"phone_number": "phone_number", "message_out": "num_interactions"},inplace = True)

In [13]:
# Platform
platform = google_int.groupby(['phone_number','sender']).learner_id.nunique().reset_index()
platform.drop_duplicates(subset='phone_number',inplace=True)

### Preparation of Data

>### Adding the num_interactions, age and gender columns

In [14]:
# merging with num_interactions
google_int = pd.merge(google_int,df_num_int,left_on ='phone_number',right_on = 'phone_number',how='left')

In [15]:
# Merging with age_gender 
google_int = pd.merge(google_int,profile_cleaned,left_on ='phone_number',right_on = 'phone_number',how='left')

In [16]:
# Merging with age gender to the knowledge score dataframe
knowledge_score = pd.merge(knowledge_score,profile_cleaned,left_on='phone_number',right_on='phone_number',how='left')

In [17]:
knowledge_score = pd.merge(knowledge_score,platform,left_on='phone_number',right_on='phone_number',how='left')

>### Dropping Testers from the interactions dataset

In [18]:
# converting testers list to str
google_testers = [254722751761,254722403601,254721142364,254703257597,254726572541,254706410315,254703878695,254719666378,254720461425,254723888639,254799260550,254791578958,254727357236,254726572541,254735214551,254726363167,254703878695,254713015501,254732805011,254721626202,254718129918,254703903004,25400088814199,25400082264685,25400082269363,254726406275]
google_testers_str = []
for b in google_testers:
    google_testers_str.append(str(b))

In [19]:
# getting the interactions dataset without testers
corr_googlelearners = []
for y in google_testers_str:
    df2 = google_int[google_int.phone_number == y]
    corr_googlelearners.append(df2)
    
googleint_testers = pd.concat(corr_googlelearners)

In [20]:
googleint_testers.phone_number.nunique()

18

In [21]:
# Dropping testers from the interactions dataset
drop_indexes = list(googleint_testers.index)
google_int.drop(drop_indexes,inplace=True)
# counting the new number after dropping
google_int.phone_number.nunique()

9939

>### Extracting Profile Data from the Profile interactions Set

In [22]:
profile_int[['phone_number','message_out']]

Unnamed: 0,phone_number,message_out
0,110039919,Question 1: What is your age bracket? Reply wi...
1,110039919,Question 2: Reply with the name of your county.
2,17039538561,Question 1: What is your age bracket? Reply wi...
3,17039538561,Question 2: Reply with the name of your county.
4,17039538561,Question 3: What is your MAIN source of income...
...,...,...
15341,972509939497,Question 2: Reply with the name of your county.
15342,972509939497,Question 3: What is your MAIN source of income...
15343,972509939497,Question 4: What is your gender? Reply with a ...
15344,972509939497,Your profile is complete! Choose a topic on th...


In [23]:
# Grouping to get phone_number and messages on profiling
profile_grouped = profile_int.groupby(['phone_number','message_out']).learner_id.nunique().reset_index()
profile_grouped.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13063 entries, 0 to 13062
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   phone_number  13063 non-null  int64 
 1   message_out   13063 non-null  object
 2   learner_id    13063 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 306.3+ KB


In [24]:
# Getting the last message for each learner using their phone number and last time accessed
phone_numbers = list(profile_grouped.phone_number)
profile_message = list(profile_grouped.message_out)
zip_grouped = list(zip(phone_numbers,profile_message))
print(zip_grouped[1])
# Extracting the last message for each learner thus far
user_response = []
for each in zip_grouped:
    df5 = profile_int[(profile_int.phone_number == each[0]) & (profile_int.message_out == each[1])]
    y = list(df5['user_response'])[0]
    user_response.append(y)

(110039919, 'Question 2: Reply with the name of your county.')


In [25]:
# Creating a dataframe of the each of the learners last message
profile_grouped_full = pd.DataFrame(list(zip(phone_numbers,profile_message,user_response)),columns=['phone_number', 'message_out','user_response'])
profile_grouped_full.head(2)

Unnamed: 0,phone_number,message_out,user_response
0,110039919,Question 1: What is your age bracket? Reply wi...,1.0
1,110039919,Question 2: Reply with the name of your county.,


In [26]:
# Function to extract profile questions
def is_prof(x):
    if 'Question 1:' in x:
        return 'age'
    elif 'Swali 1:' in x:
        return 'age'
    elif 'Question 4:' in x:
        return 'gender'
    elif 'Swali 4:' in x:
        return 'gender'
    elif 'Swali 2:' in x:
        return 'county'
    elif 'Question 2:' in x:
        return 'county'
    else:
        return 'invalid'

profile_grouped_full['profile'] = profile_grouped_full['message_out'].apply(is_prof)

In [27]:
profile_grouped_full

Unnamed: 0,phone_number,message_out,user_response,profile
0,110039919,Question 1: What is your age bracket? Reply wi...,1,age
1,110039919,Question 2: Reply with the name of your county.,,county
2,17039538561,Question 1: What is your age bracket? Reply wi...,4. 36 - 50 yrs,age
3,17039538561,Question 2: Reply with the name of your county.,Pune,county
4,17039538561,Question 3: What is your MAIN source of income...,1. Farming,invalid
...,...,...,...,...
13058,4559932780744436,Question 2: Reply with the name of your county.,Kisii,county
13059,4559932780744436,Question 3: What is your MAIN source of income...,2,invalid
13060,4559932780744436,Question 4: What is your gender? Reply with a ...,1,gender
13061,4559932780744436,Reply with a number to learn:\n1. COVID-19 Hel...,5,invalid


In [28]:
df_profile_response = profile_grouped_full[profile_grouped_full.profile != 'invalid']
df_profile_response = df_profile_response[['phone_number','profile','user_response']]
df_profile_response.head()

Unnamed: 0,phone_number,profile,user_response
0,110039919,age,1
1,110039919,county,
2,17039538561,age,4. 36 - 50 yrs
3,17039538561,county,Pune
5,17039538561,gender,1. Female


In [29]:
# dropping duplicates in the 
#df_profile_response[df_profile_response.duplicated()]
df_profile_response.drop_duplicates(subset = ['phone_number', 'profile'],inplace = True)

In [30]:
#df_profile_response[df_profile_response.duplicated()]

In [31]:
#df_profile_response.set_index(['phone_number','profile'],inplace=True)
df_profilePivot = df_profile_response.pivot(index='phone_number',columns='profile',values='user_response')

In [32]:
#df_profilePivot.to_excel('profiles.xlsx')

In [33]:
# At this point i cleaned the data Offline and Read back into the Notebooks - It was easier cleaning with excel

>### Preparing the code dictionaries

In [34]:
# dataset with the length of the contents
codes_mb = pd.read_excel('AFE_codes.xlsx',sheet_name= 'mb')
dict_start_mb = dict(zip(list(codes_mb.program_code), list(codes_mb.start)))
dict_complete_mb = dict(zip(list(codes_mb.program_code), list(codes_mb.end)))
c_mb = list(codes_mb.program_code.unique())

In [35]:
codes_seq = pd.read_excel('AFE_codes.xlsx',sheet_name= 'lnS')
dict_start_seq = dict(zip(list(codes_seq.program_code), list(codes_seq.start)))
dict_complete_seq = dict(zip(list(codes_seq.program_code), list(codes_seq.end)))
c_seq = list(codes_seq.program_code.unique())

In [36]:
codes_nonseq = pd.read_excel('AFE_codes.xlsx',sheet_name= 'lnG')
dict_start_nonseq = dict(zip(list(codes_nonseq.program_code), list(codes_nonseq.start)))
dict_complete_nonseq = dict(zip(list(codes_nonseq.program_code), list(codes_nonseq.end)))
c_nonseq = list(codes_nonseq.program_code.unique())

In [37]:
# dataset with the length of the contents
codes_mb_dup = pd.read_excel('AFE_codes.xlsx',sheet_name= 'mb_dup')
dict_start_mb_dup = dict(zip(list(codes_mb_dup.program_code), list(codes_mb_dup.start)))
dict_complete_mb_dup = dict(zip(list(codes_mb_dup.program_code), list(codes_mb_dup.end)))
c_mb_dup = list(codes_mb_dup.program_code.unique())

>
### Preparing the interaction dataset for each variation

In [38]:
# Extracting menu based interactions
# Create the list
mb_code_list = list(AFE_codes_mb.program_code)
mb_dataframes = []
for x in mb_code_list:
    df4 = google_int[google_int.program_code == x]
    mb_dataframes.append(df4)
# Concat the List
mb_int = pd.concat(mb_dataframes)
mb_int.phone_number.nunique()

1231

In [39]:
# Extracting long Narrative Seq interactions
# Create the list
lnSeq_code_list = list(AFE_codes_ln_seq.program_code)
lnSeq_dataframes = []
for x in lnSeq_code_list:
    df4 = google_int[google_int.program_code == x]
    lnSeq_dataframes.append(df4)
# Concat the List
seq_int = pd.concat(lnSeq_dataframes)
seq_int.phone_number.nunique()

501

In [40]:
# Extracting long Narrative Seq interactions
# Create the list
nonSeq_code_list = list(AFE_codes_ln_nonseq.program_code)
nonSeq_dataframes = []
for x in nonSeq_code_list:
    df4 = google_int[google_int.program_code == x]
    nonSeq_dataframes.append(df4)
# Concat the List
nonseq_int = pd.concat(nonSeq_dataframes)
nonseq_int.phone_number.nunique()

441

In [53]:
nonseq_int.sender.unique()

array(['22744', 'fb', 'telegram', 'whatsapp', 'whatsapp_ni', '33300'],
      dtype=object)

### Analysis

>## Engagement Level

In [54]:
def get_interactions(df):
    df_interactions = df.groupby('learner_id').message_out.count().reset_index()
    return df_interactions.message_out.mean(), df_interactions.message_out.median()

In [55]:
# Mean and Median Engagement level for menu based
get_interactions(mb_int)

(38.80097481722177, 21.0)

In [181]:
# Mean and Median Engagement level for menu based dropped
get_interactions(mb_int_dropped)

(38.082835183603756, 21.0)

In [57]:
# Mean and Median Engagement level for sequential Message
get_interactions(seq_int)

(42.007984031936125, 20.0)

In [182]:
# Mean and Median Engagement level for sequential Message Dropped
get_interactions(seq_int_dropped)

(40.630390143737166, 20.0)

In [59]:
# Mean and Median Engagement level for Grouped Message
get_interactions(nonseq_int)

(32.673469387755105, 14.0)

In [183]:
# Mean and Median Engagement level for Grouped Message Dropped
get_interactions(nonseq_int_dropped)

(31.191601049868765, 13.0)

>## Knowledge Scores

In [61]:
knowledge_score.head()

Unnamed: 0.2,index,partner_id,user_id,phone_number,trainings_attempted_blooms_content_question,possible_blooms_content_questions,blooms_content_questions_attempted,blooms_content_questions_scored_correct,quizzes_attempted,possible_quiz_questions,...,blooms_quiz_questions_scored_correct,trainings_score,trainings_possible_score,percent,Unnamed: 0,Unnamed: 0.1,age,gender,sender,learner_id
0,1,81.0,130.0,254702796022,1.0,2.0,2.0,2.0,0.0,0.0,...,,7.0,7.0,100.0,,,,,22744,1.0
1,2,81.0,427.0,254727450780,4.0,9.0,9.0,5.0,0.0,0.0,...,,15.5,22.5,69.0,,,,,22744,1.0
2,3,81.0,440.0,254710458341,1.0,3.0,3.0,3.0,0.0,0.0,...,,6.5,6.5,100.0,,,,,22744,1.0
3,4,81.0,612.0,254723595423,0.0,0.0,0.0,,1.0,10.0,...,3.0,9.0,23.0,39.0,,,,,22744,1.0
4,5,81.0,929.0,254712484825,2.0,4.0,3.0,2.0,0.0,0.0,...,,4.5,11.0,41.0,,,,,22744,1.0


In [62]:
def get_KS(df):
    actual_score = df.trainings_score.mean()
    total_score = df.trainings_possible_score.mean()
    print(actual_score)
    print(total_score)
    print(actual_score/total_score * 100)

In [63]:
# Knowledge score data for menu based variation
mb_scores = []
for x in list(mb_int.phone_number.unique()):
    df4 = knowledge_score[knowledge_score.phone_number == x]
    mb_scores.append(df4)
# Concat the List
mb_knowledgescore = pd.concat(mb_scores)
mb_knowledgescore.phone_number.nunique()

852

In [175]:
# Knowledge score data for menu based variation Dropped
mb_scores = []
for x in list(mb_int_dropped.phone_number.unique()):
    df4 = knowledge_score[knowledge_score.phone_number == x]
    mb_scores.append(df4)
# Concat the List
mb_knowledgescore_dropped = pd.concat(mb_scores)
mb_knowledgescore_dropped.phone_number.nunique()

797

In [75]:
# Knowledge Score for sequential variation
seq_scores = []
for x in list(seq_int.phone_number.unique()):
    df4 = knowledge_score[knowledge_score.phone_number == x]
    seq_scores.append(df4)
# Concat the List
seq_knowledgescore = pd.concat(seq_scores)
seq_knowledgescore.phone_number.nunique()

338

In [176]:
# Knowledge Score for sequential variation for Dropped
seq_scores = []
for x in list(seq_int_dropped.phone_number.unique()):
    df4 = knowledge_score[knowledge_score.phone_number == x]
    seq_scores.append(df4)
# Concat the List
seq_knowledgescore_dropped = pd.concat(seq_scores)
seq_knowledgescore_dropped.phone_number.nunique()

324

In [77]:
# Knowledge Score for sequential variation for Dropped
nonseq_scores = []
for x in list(nonseq_int.phone_number.unique()):
    df4 = knowledge_score[knowledge_score.phone_number == x]
    nonseq_scores.append(df4)
# Concat the List
nonseq_knowledgescore = pd.concat(nonseq_scores)
nonseq_knowledgescore.phone_number.nunique()

296

In [177]:
# Knowledge Score for sequential variation for Dropped
nonseq_scores = []
for x in list(nonseq_int_dropped.phone_number.unique()):
    df4 = knowledge_score[knowledge_score.phone_number == x]
    nonseq_scores.append(df4)
# Concat the List
nonseq_knowledgescore_dropped = pd.concat(nonseq_scores)
nonseq_knowledgescore_dropped.phone_number.nunique()

241

In [79]:
# Menu Based Knowledge Score
get_KS(mb_knowledgescore)

48.7887323943662
63.93779342723005
76.30656264341441


In [178]:
# Menu Based Knowledge Score for Dropped
get_KS(mb_knowledgescore_dropped)

44.91091593475533
58.89272271016311
76.2588548601864


In [81]:
# Sequential Knowledge Score
get_KS(seq_knowledgescore)

51.773668639053255
67.56952662721893
76.62280797775686


In [179]:
# Sequential Knowledge Score for Dropped
get_KS(seq_knowledgescore_dropped)

45.23456790123457
58.824074074074076
76.89805341308568


In [83]:
# Non Sequential Knowledge Score
get_KS(nonseq_knowledgescore)

57.638513513513516
74.71621621621621
77.14324470971243


In [180]:
# Non Sequential Knowledge Score for dropped
get_KS(nonseq_knowledgescore_dropped)

46.83402489626556
60.49170124481328
77.42223136811057


>## Completion Rates

In [85]:
# defined function to acquire the learners
def get_complete_figs(b,dict_start,dict_complete,c):
    for v in c:
        df = b[(b['program_code'] == v)].groupby(['message_out']).learner_id.nunique().reset_index()
        min = str(dict_start.get(v))
        df['is_beginning'] = df.message_out.apply(lambda x : True if min in x else False)
        df_begin = df[df['is_beginning'] == True]
        begun = df_begin.learner_id.sum()
        max = str(dict_complete.get(v))
        starting.append(begun)
        df['is_completing'] = df.message_out.apply(lambda x : True if max in x else False)
        df_complete = df[df['is_completing'] == True]
        complete = df_complete.learner_id.sum()
        completing.append(complete)

    test_df = pd.DataFrame(list(zip(c,starting,completing)),columns=['code', 'starting','completing'])
    display(test_df)

In [86]:
# Defining code to get completion of all training
def get_compTraining(b,dict_complete,c):
    df_full = []
    for v in c:
        df = b[(b['program_code'] == v)]
        max = str(dict_complete.get(v))
        df['is_completing'] = df.message_out.apply(lambda x : True if max in x else False)
        df_complete = df[df['is_completing'] == True]
        df_full.append(df_complete)

    full_Train = pd.concat(df_full)
    full_Train = full_Train.groupby(['phone_number','program_code']).learner_id.nunique().unstack().reset_index()
    return full_Train

#### Menu Based

In [184]:
starting = []
completing = []
get_complete_figs(mb_int,dict_start_mb,dict_complete_mb,c_mb)

Unnamed: 0,code,starting,completing
0,GSAVE,161,112
1,GQUIZ,182,139
2,GSECURE,241,171
3,GSPELLS,182,146
4,GSAVING,99,82
5,GGROW,220,160
6,GTLOAN,357,253
7,GLOAN,361,270
8,GMONEY,139,106
9,GSAVEM,91,0


In [88]:
df_complete_mb = get_compTraining(mb_int,dict_complete_mb,c_mb)

In [89]:
df_complete_mb.to_excel('mb_complete.xlsx')

In [188]:
df_complete_mb_dropped = get_compTraining(mb_int_dropped,dict_complete_mb,c_mb)

In [189]:
df_complete_mb_dropped.to_excel('mb_complete_dropped.xlsx')

In [92]:
# Dropped

In [93]:
starting = []
completing = []
get_complete_figs(mb_int_dropped,dict_start_mb,dict_complete_mb,c_mb)

NameError: name 'mb_int_dropped' is not defined

#### LN - Sequential

In [185]:
starting = []
completing = []
get_complete_figs(seq_int,dict_start_seq,dict_complete_seq,c_seq)

Unnamed: 0,code,starting,completing
0,GFUTURE,496,273
1,GDSPELLS,214,131
2,GSAVENOW,0,0
3,GLGROW,88,66
4,GLLOANS,58,47
5,GLGAMBLE,40,34
6,GMMOBILE,31,30
7,GSAVES,29,27
8,GLBORROW,25,23
9,GAVOIDCONS,20,20


In [186]:
#Dropped
starting = []
completing = []
get_complete_figs(seq_int_dropped,dict_start_seq,dict_complete_seq,c_seq)

Unnamed: 0,code,starting,completing
0,GFUTURE,482,265
1,GDSPELLS,206,125
2,GSAVENOW,0,0
3,GLGROW,84,64
4,GLLOANS,56,45
5,GLGAMBLE,38,32
6,GMMOBILE,29,28
7,GSAVES,28,26
8,GLBORROW,24,22
9,GAVOIDCONS,19,19


#### Non Seq

In [96]:
starting = []
completing = []
get_complete_figs(nonseq_int,dict_start_nonseq,dict_complete_nonseq,c_nonseq)

Unnamed: 0,code,starting,completing
0,GSECFUT,438,232
1,GPREPDRY,174,125
2,GLOGROW,88,75
3,GWHKNOW,68,61
4,GGAMBLING,56,52
5,GMOMO,48,45
6,GMOSAVE,39,34
7,GBOROW,32,31
8,GMOCON,28,27
9,GCERTQ,23,23


In [187]:
starting = []
completing = []
get_complete_figs(nonseq_int_dropped,dict_start_nonseq,dict_complete_nonseq,c_nonseq)

Unnamed: 0,code,starting,completing
0,GSECFUT,378,196
1,GPREPDRY,149,103
2,GLOGROW,74,61
3,GWHKNOW,54,49
4,GGAMBLING,45,41
5,GMOMO,38,35
6,GMOSAVE,31,28
7,GBOROW,26,26
8,GMOCON,23,22
9,GCERTQ,18,18


>### Stats By Category

>## Gender

In [98]:
def stats_category(df,category,list_category):
    for each in list_category:
        print('-------------------------')
        print('Number of Learners in {}'.format(each))
        print(df[df[category] == each].phone_number.nunique())
        print('Engagement Level in {}'.format(each))
        print(get_interactions(df[df[category] == each]))

In [99]:
def ks_category(df,category,list_category):
    for each in list_category:
        print('-----------------------------------------')
        print('The Number of learners in {}'.format(each))
        print(df[df[category] == each].phone_number.nunique())
        print('The Knowledge Score for {}'.format(each))
        print(get_KS(df[df[category] == each]))

>### Engagement

In [100]:
gender_list = ['Female','Male']

In [101]:
#mb_int

In [102]:
stats_category(mb_int,'gender',['Female','Male'])

-------------------------
Number of Learners in Female
56
Engagement Level in Female
(53.69642857142857, 35.0)
-------------------------
Number of Learners in Male
167
Engagement Level in Male
(47.82035928143713, 26.0)


In [103]:
#seq_int

In [104]:
stats_category(seq_int,'gender',['Female','Male'])

-------------------------
Number of Learners in Female
17
Engagement Level in Female
(46.23529411764706, 22.0)
-------------------------
Number of Learners in Male
36
Engagement Level in Male
(90.58333333333333, 22.0)


In [105]:
#nonseq_int

In [106]:
stats_category(nonseq_int,'gender',['Female','Male'])

-------------------------
Number of Learners in Female
15
Engagement Level in Female
(52.733333333333334, 18.0)
-------------------------
Number of Learners in Male
37
Engagement Level in Male
(51.54054054054054, 23.0)


>### Knowledge Score

In [107]:
#menu based
ks_category(mb_knowledgescore,'gender',['Female','Male'])

-----------------------------------------
The Number of learners in Female
50
The Knowledge Score for Female
74.0
96.81
76.43838446441484
None
-----------------------------------------
The Number of learners in Male
138
The Knowledge Score for Male
79.77898550724638
101.42753623188406
78.65614060155747
None


In [108]:
#sequential
ks_category(seq_knowledgescore,'gender',['Female','Male'])

-----------------------------------------
The Number of learners in Female
17
The Knowledge Score for Female
72.97058823529412
92.73529411764706
78.68696479543293
None
-----------------------------------------
The Number of learners in Male
33
The Knowledge Score for Male
104.03030303030303
134.8181818181818
77.16340750730501
None


In [109]:
#nonseq_knowledgescore
ks_category(nonseq_knowledgescore,'gender',['Female','Male'])

-----------------------------------------
The Number of learners in Female
12
The Knowledge Score for Female
102.29166666666667
130.25
78.53486884197058
None
-----------------------------------------
The Number of learners in Male
35
The Knowledge Score for Male
100.25714285714285
127.95714285714286
78.35212682817908
None


>## platform

In [110]:
platform_list = ['22744', 'telegram', 'whatsapp', 'fb']

>#### Engagement Level

In [111]:
# menu based 
stats_category(mb_int,'sender',platform_list)

-------------------------
Number of Learners in 22744
1178
Engagement Level in 22744
(38.16298811544991, 21.0)
-------------------------
Number of Learners in telegram
6
Engagement Level in telegram
(69.0, 53.5)
-------------------------
Number of Learners in whatsapp
36
Engagement Level in whatsapp
(34.166666666666664, 8.5)
-------------------------
Number of Learners in fb
13
Engagement Level in fb
(59.46153846153846, 23.0)


In [112]:
#sequential
stats_category(seq_int,'sender',platform_list)

-------------------------
Number of Learners in 22744
479
Engagement Level in 22744
(40.1419624217119, 20.0)
-------------------------
Number of Learners in telegram
1
Engagement Level in telegram
(55.0, 55.0)
-------------------------
Number of Learners in whatsapp
19
Engagement Level in whatsapp
(61.68421052631579, 27.0)
-------------------------
Number of Learners in fb
4
Engagement Level in fb
(142.5, 141.5)


In [113]:
#non_seq
stats_category(nonseq_int,'sender',platform_list)

-------------------------
Number of Learners in 22744
417
Engagement Level in 22744
(32.45323741007194, 14.0)
-------------------------
Number of Learners in telegram
2
Engagement Level in telegram
(21.0, 21.0)
-------------------------
Number of Learners in whatsapp
18
Engagement Level in whatsapp
(21.444444444444443, 11.5)
-------------------------
Number of Learners in fb
3
Engagement Level in fb
(131.33333333333334, 184.0)


>#### Knowledge Score

In [114]:
#menu based
ks_category(mb_knowledgescore,'sender',platform_list)

-----------------------------------------
The Number of learners in 22744
823
The Knowledge Score for 22744
47.78007290400972
62.59720534629405
76.32940263017422
None
-----------------------------------------
The Number of learners in telegram
0
The Knowledge Score for telegram
nan
nan
nan
None
-----------------------------------------
The Number of learners in whatsapp
10
The Knowledge Score for whatsapp
31.0
43.6
71.10091743119266
None
-----------------------------------------
The Number of learners in fb
10
The Knowledge Score for fb
167.75
215.55
77.82417072604963
None


In [115]:
#Sequential
ks_category(seq_knowledgescore,'sender',platform_list)

-----------------------------------------
The Number of learners in 22744
323
The Knowledge Score for 22744
49.688854489164086
64.42105263157895
77.13139177239523
None
-----------------------------------------
The Number of learners in telegram
0
The Knowledge Score for telegram
nan
nan
nan
None
-----------------------------------------
The Number of learners in whatsapp
9
The Knowledge Score for whatsapp
44.611111111111114
57.5
77.58454106280193
None
-----------------------------------------
The Number of learners in fb
4
The Knowledge Score for fb
258.125
373.0
69.2024128686327
None


In [116]:
# non sequential
ks_category(nonseq_knowledgescore,'sender',platform_list)

-----------------------------------------
The Number of learners in 22744
283
The Knowledge Score for 22744
54.99823321554771
70.59540636042402
77.90624921790926
None
-----------------------------------------
The Number of learners in telegram
0
The Knowledge Score for telegram
nan
nan
nan
None
-----------------------------------------
The Number of learners in whatsapp
7
The Knowledge Score for whatsapp
38.357142857142854
45.214285714285715
84.8341232227488
None
-----------------------------------------
The Number of learners in fb
3
The Knowledge Score for fb
401.5
596.5
67.30930427493713
None


>## Age

In [117]:
age_list = ['Below 18 yrs', '18 - 25 yrs', '26 - 35 yrs','36 - 50 yrs', '51 - 65 yrs','Above 65 yrs']

>#### Engagement Level

In [118]:
stats_category(mb_int,'age',age_list)

-------------------------
Number of Learners in Below 18 yrs
15
Engagement Level in Below 18 yrs
(77.86666666666666, 32.0)
-------------------------
Number of Learners in 18 - 25 yrs
105
Engagement Level in 18 - 25 yrs
(49.86666666666667, 26.0)
-------------------------
Number of Learners in 26 - 35 yrs
90
Engagement Level in 26 - 35 yrs
(48.25555555555555, 30.5)
-------------------------
Number of Learners in 36 - 50 yrs
18
Engagement Level in 36 - 50 yrs
(67.77777777777777, 35.0)
-------------------------
Number of Learners in 51 - 65 yrs
4
Engagement Level in 51 - 65 yrs
(18.25, 15.5)
-------------------------
Number of Learners in Above 65 yrs
2
Engagement Level in Above 65 yrs
(44.0, 44.0)


In [119]:
stats_category(seq_int,'age',age_list)

-------------------------
Number of Learners in Below 18 yrs
3
Engagement Level in Below 18 yrs
(45.666666666666664, 60.0)
-------------------------
Number of Learners in 18 - 25 yrs
24
Engagement Level in 18 - 25 yrs
(40.666666666666664, 20.5)
-------------------------
Number of Learners in 26 - 35 yrs
17
Engagement Level in 26 - 35 yrs
(152.11764705882354, 71.0)
-------------------------
Number of Learners in 36 - 50 yrs
9
Engagement Level in 36 - 50 yrs
(55.77777777777778, 47.0)
-------------------------
Number of Learners in 51 - 65 yrs
2
Engagement Level in 51 - 65 yrs
(21.0, 21.0)
-------------------------
Number of Learners in Above 65 yrs
0
Engagement Level in Above 65 yrs
(nan, nan)


In [120]:
stats_category(nonseq_int,'age',age_list)

-------------------------
Number of Learners in Below 18 yrs
4
Engagement Level in Below 18 yrs
(8.5, 4.5)
-------------------------
Number of Learners in 18 - 25 yrs
23
Engagement Level in 18 - 25 yrs
(64.3913043478261, 22.0)
-------------------------
Number of Learners in 26 - 35 yrs
22
Engagement Level in 26 - 35 yrs
(48.54545454545455, 27.5)
-------------------------
Number of Learners in 36 - 50 yrs
4
Engagement Level in 36 - 50 yrs
(112.75, 98.0)
-------------------------
Number of Learners in 51 - 65 yrs
1
Engagement Level in 51 - 65 yrs
(8.0, 8.0)
-------------------------
Number of Learners in Above 65 yrs
1
Engagement Level in Above 65 yrs
(3.0, 3.0)


>#### Knowledge Score

In [121]:
ks_category(mb_knowledgescore,'age',age_list)

-----------------------------------------
The Number of learners in Below 18 yrs
10
The Knowledge Score for Below 18 yrs
63.7
90.0
70.77777777777779
None
-----------------------------------------
The Number of learners in 18 - 25 yrs
86
The Knowledge Score for 18 - 25 yrs
70.5
92.29651162790698
76.38425196850393
None
-----------------------------------------
The Number of learners in 26 - 35 yrs
80
The Knowledge Score for 26 - 35 yrs
80.75625
103.0
78.40412621359224
None
-----------------------------------------
The Number of learners in 36 - 50 yrs
16
The Knowledge Score for 36 - 50 yrs
108.3125
124.8125
86.78017025538307
None
-----------------------------------------
The Number of learners in 51 - 65 yrs
3
The Knowledge Score for 51 - 65 yrs
16.5
24.166666666666668
68.27586206896551
None
-----------------------------------------
The Number of learners in Above 65 yrs
2
The Knowledge Score for Above 65 yrs
54.25
80.25
67.601246105919
None


In [122]:
ks_category(seq_knowledgescore,'age',age_list)

-----------------------------------------
The Number of learners in Below 18 yrs
2
The Knowledge Score for Below 18 yrs
92.25
111.75
82.5503355704698
None
-----------------------------------------
The Number of learners in 18 - 25 yrs
24
The Knowledge Score for 18 - 25 yrs
83.1875
102.77083333333333
80.94465842286641
None
-----------------------------------------
The Number of learners in 26 - 35 yrs
16
The Knowledge Score for 26 - 35 yrs
103.96875
142.3125
73.0566534914361
None
-----------------------------------------
The Number of learners in 36 - 50 yrs
8
The Knowledge Score for 36 - 50 yrs
86.125
106.25
81.05882352941177
None
-----------------------------------------
The Number of learners in 51 - 65 yrs
2
The Knowledge Score for 51 - 65 yrs
107.0
164.25
65.14459665144597
None
-----------------------------------------
The Number of learners in Above 65 yrs
0
The Knowledge Score for Above 65 yrs
nan
nan
nan
None


In [123]:
ks_category(nonseq_knowledgescore,'age',age_list)

-----------------------------------------
The Number of learners in Below 18 yrs
4
The Knowledge Score for Below 18 yrs
57.0
74.75
76.2541806020067
None
-----------------------------------------
The Number of learners in 18 - 25 yrs
20
The Knowledge Score for 18 - 25 yrs
92.975
117.575
79.0771847756751
None
-----------------------------------------
The Number of learners in 26 - 35 yrs
21
The Knowledge Score for 26 - 35 yrs
108.95238095238095
144.07142857142858
75.62386382416129
None
-----------------------------------------
The Number of learners in 36 - 50 yrs
4
The Knowledge Score for 36 - 50 yrs
127.125
155.25
81.88405797101449
None
-----------------------------------------
The Number of learners in 51 - 65 yrs
0
The Knowledge Score for 51 - 65 yrs
nan
nan
nan
None
-----------------------------------------
The Number of learners in Above 65 yrs
0
The Knowledge Score for Above 65 yrs
nan
nan
nan
None


>## Completion Rates

In [124]:
mb_completionCodes = ['GCONS','GCONSK']
seq_completionCodes = ['GAVOIDCONS','GAVOIDCONSK']
nonseq_completionCodes = ['GMOCON','GMOCONK']

In [125]:
def complete_category(b,list_category,category,codes,dict_complete):
    for each in list_category:
        dat = b[b[category] == each]
        df = dat[(dat.program_code == codes[0]) | (dat.program_code == codes[1])].groupby(['message_out']).learner_id.nunique().reset_index()
        max = str(dict_complete.get(codes[0]))
        df['is_completing'] = df.message_out.apply(lambda x : True if max in x else False)
        df_complete = df[df['is_completing'] == True]
        complete = df_complete.learner_id.sum()
        print('---------------------------------')
        print('Completion for {}'.format(each))
        print(complete)

>## Gender

In [126]:
complete_category(mb_int,gender_list,'gender',mb_completionCodes,dict_complete_mb)

---------------------------------
Completion for Female
5
---------------------------------
Completion for Male
15


In [127]:
complete_category(seq_int,gender_list,'gender',seq_completionCodes,dict_complete_seq)

---------------------------------
Completion for Female
0
---------------------------------
Completion for Male
4


In [128]:
complete_category(nonseq_int,gender_list,'gender',nonseq_completionCodes,dict_complete_nonseq)

---------------------------------
Completion for Female
2
---------------------------------
Completion for Male
4


>## Platform

In [129]:
complete_category(mb_int,platform_list,'sender',mb_completionCodes,dict_complete_mb)

---------------------------------
Completion for 22744
71
---------------------------------
Completion for telegram
1
---------------------------------
Completion for whatsapp
4
---------------------------------
Completion for fb
2


In [130]:
complete_category(seq_int,platform_list,'sender',seq_completionCodes,dict_complete_seq)

---------------------------------
Completion for 22744
18
---------------------------------
Completion for telegram
0
---------------------------------
Completion for whatsapp
2
---------------------------------
Completion for fb
2


In [131]:
complete_category(nonseq_int,platform_list,'sender',nonseq_completionCodes,dict_complete_nonseq)

---------------------------------
Completion for 22744
26
---------------------------------
Completion for telegram
0
---------------------------------
Completion for whatsapp
1
---------------------------------
Completion for fb
2


>## Age

In [132]:
complete_category(mb_int,age_list,'age',mb_completionCodes,dict_complete_mb)

---------------------------------
Completion for Below 18 yrs
2
---------------------------------
Completion for 18 - 25 yrs
9
---------------------------------
Completion for 26 - 35 yrs
9
---------------------------------
Completion for 36 - 50 yrs
3
---------------------------------
Completion for 51 - 65 yrs
0
---------------------------------
Completion for Above 65 yrs
0


In [133]:
complete_category(seq_int,age_list,'age',seq_completionCodes,dict_complete_seq)

---------------------------------
Completion for Below 18 yrs
0
---------------------------------
Completion for 18 - 25 yrs
1
---------------------------------
Completion for 26 - 35 yrs
3
---------------------------------
Completion for 36 - 50 yrs
0
---------------------------------
Completion for 51 - 65 yrs
0
---------------------------------
Completion for Above 65 yrs
0


In [134]:
complete_category(nonseq_int,age_list,'age',nonseq_completionCodes,dict_complete_nonseq)

---------------------------------
Completion for Below 18 yrs
0
---------------------------------
Completion for 18 - 25 yrs
4
---------------------------------
Completion for 26 - 35 yrs
2
---------------------------------
Completion for 36 - 50 yrs
1
---------------------------------
Completion for 51 - 65 yrs
0
---------------------------------
Completion for Above 65 yrs
0


>### Training Completion rate:

In [135]:
AFE_comp_codes = pd.read_excel('AFE_codes.xlsx',sheet_name= 'mb_codes')

In [136]:
AFE_comp_codes.end.unique()

array(['(15/15)', '(17/17)', '(8/8)', '(13/13)', '(11/11)', '(12/12)'],
      dtype=object)

In [137]:
comp_codes_mb = list(AFE_comp_codes.end.unique())
def message_check(x):
    for each in comp_codes_mb:
        if each in x:
            return True

In [138]:
mb_int['mb_completed'] = mb_int['message_out'].apply(message_check)

In [139]:
def comp_oneTraining(b,list_category,category):
    for each in list_category:
        print('Training Completion for {}'.format(each))
        df = b[b[category] == each]
        print(df[df.mb_completed == True].phone_number.nunique())
        print('--------------------------------------------------')

In [140]:
comp_oneTraining(mb_int,age_list,'age')

Training Completion for Below 18 yrs
10
--------------------------------------------------
Training Completion for 18 - 25 yrs
61
--------------------------------------------------
Training Completion for 26 - 35 yrs
65
--------------------------------------------------
Training Completion for 36 - 50 yrs
11
--------------------------------------------------
Training Completion for 51 - 65 yrs
2
--------------------------------------------------
Training Completion for Above 65 yrs
2
--------------------------------------------------


In [141]:
comp_oneTraining(mb_int,gender_list,'gender')

Training Completion for Female
40
--------------------------------------------------
Training Completion for Male
104
--------------------------------------------------


In [142]:
comp_oneTraining(mb_int,platform_list,'sender')

Training Completion for 22744
657
--------------------------------------------------
Training Completion for telegram
4
--------------------------------------------------
Training Completion for whatsapp
16
--------------------------------------------------
Training Completion for fb
8
--------------------------------------------------


In [143]:
# Long Narrative grouped

In [144]:
def categorize_df(b,list_category,category):
    category_dataframes = []
    for each in list_category:
        df = seq_int[seq_int[category] == each]
    for x in category_dataframes:
        starting = []
        completing = []
        get_complete(x,dict_start_seq,dict_complete_seq,c_seq)

In [145]:
for each in gender_list:
    starting = []
    completing = []
    df = seq_int[seq_int['gender'] == each]
    print('Training Completion for {}'.format(each))
    get_complete_figs(df,dict_start_seq,dict_complete_seq,c_seq)

Training Completion for Female


Unnamed: 0,code,starting,completing
0,GFUTURE,15,12
1,GDSPELLS,8,6
2,GSAVENOW,0,0
3,GLGROW,3,3
4,GLLOANS,2,2
5,GLGAMBLE,1,1
6,GMMOBILE,1,1
7,GSAVES,1,0
8,GLBORROW,0,0
9,GAVOIDCONS,0,0


Training Completion for Male


Unnamed: 0,code,starting,completing
0,GFUTURE,36,22
1,GDSPELLS,18,16
2,GSAVENOW,0,0
3,GLGROW,14,13
4,GLLOANS,10,8
5,GLGAMBLE,7,6
6,GMMOBILE,6,6
7,GSAVES,6,5
8,GLBORROW,5,4
9,GAVOIDCONS,3,3


In [146]:
for each in age_list:
    starting = []
    completing = []
    df = seq_int[seq_int['age'] == each]
    print('Training Completion for {}'.format(each))
    get_complete_figs(df,dict_start_seq,dict_complete_seq,c_seq)

Training Completion for Below 18 yrs


Unnamed: 0,code,starting,completing
0,GFUTURE,3,2
1,GDSPELLS,2,2
2,GSAVENOW,0,0
3,GLGROW,1,1
4,GLLOANS,1,0
5,GLGAMBLE,0,0
6,GMMOBILE,0,0
7,GSAVES,0,0
8,GLBORROW,0,0
9,GAVOIDCONS,0,0


Training Completion for 18 - 25 yrs


Unnamed: 0,code,starting,completing
0,GFUTURE,23,13
1,GDSPELLS,11,6
2,GSAVENOW,0,0
3,GLGROW,6,5
4,GLLOANS,5,2
5,GLGAMBLE,2,2
6,GMMOBILE,2,2
7,GSAVES,2,2
8,GLBORROW,2,2
9,GAVOIDCONS,1,1


Training Completion for 26 - 35 yrs


Unnamed: 0,code,starting,completing
0,GFUTURE,17,14
1,GDSPELLS,11,11
2,GSAVENOW,0,0
3,GLGROW,8,8
4,GLLOANS,6,6
5,GLGAMBLE,5,5
6,GMMOBILE,5,5
7,GSAVES,5,3
8,GLBORROW,3,2
9,GAVOIDCONS,2,2


Training Completion for 36 - 50 yrs


Unnamed: 0,code,starting,completing
0,GFUTURE,8,6
1,GDSPELLS,5,5
2,GSAVENOW,0,0
3,GLGROW,4,4
4,GLLOANS,2,2
5,GLGAMBLE,1,0
6,GMMOBILE,0,0
7,GSAVES,0,0
8,GLBORROW,0,0
9,GAVOIDCONS,0,0


Training Completion for 51 - 65 yrs


Unnamed: 0,code,starting,completing
0,GFUTURE,2,2
1,GDSPELLS,0,0
2,GSAVENOW,0,0
3,GLGROW,0,0
4,GLLOANS,0,0
5,GLGAMBLE,0,0
6,GMMOBILE,0,0
7,GSAVES,0,0
8,GLBORROW,0,0
9,GAVOIDCONS,0,0


Training Completion for Above 65 yrs


Unnamed: 0,code,starting,completing
0,GFUTURE,0,0
1,GDSPELLS,0,0
2,GSAVENOW,0,0
3,GLGROW,0,0
4,GLLOANS,0,0
5,GLGAMBLE,0,0
6,GMMOBILE,0,0
7,GSAVES,0,0
8,GLBORROW,0,0
9,GAVOIDCONS,0,0


In [147]:
for each in platform_list:
    starting = []
    completing = []
    df = seq_int[seq_int['sender'] == each]
    print('Training Completion for {}'.format(each))
    get_complete_figs(df,dict_start_seq,dict_complete_seq,c_seq)

Training Completion for 22744


Unnamed: 0,code,starting,completing
0,GFUTURE,474,259
1,GDSPELLS,201,121
2,GSAVENOW,0,0
3,GLGROW,80,58
4,GLLOANS,51,40
5,GLGAMBLE,35,30
6,GMMOBILE,27,26
7,GSAVES,25,24
8,GLBORROW,22,20
9,GAVOIDCONS,17,17


Training Completion for telegram


Unnamed: 0,code,starting,completing
0,GFUTURE,1,0
1,GDSPELLS,1,1
2,GSAVENOW,0,0
3,GLGROW,0,0
4,GLLOANS,0,0
5,GLGAMBLE,0,0
6,GMMOBILE,0,0
7,GSAVES,0,0
8,GLBORROW,0,0
9,GAVOIDCONS,0,0


Training Completion for whatsapp


Unnamed: 0,code,starting,completing
0,GFUTURE,17,11
1,GDSPELLS,10,8
2,GSAVENOW,0,0
3,GLGROW,7,7
4,GLLOANS,6,6
5,GLGAMBLE,4,3
6,GMMOBILE,3,3
7,GSAVES,3,2
8,GLBORROW,2,2
9,GAVOIDCONS,2,2


Training Completion for fb


Unnamed: 0,code,starting,completing
0,GFUTURE,3,2
1,GDSPELLS,2,1
2,GSAVENOW,0,0
3,GLGROW,1,1
4,GLLOANS,1,1
5,GLGAMBLE,1,1
6,GMMOBILE,1,1
7,GSAVES,1,1
8,GLBORROW,1,1
9,GAVOIDCONS,1,1


In [148]:
# Nonseq

In [149]:
for each in age_list:
    starting = []
    completing = []
    df = nonseq_int[nonseq_int['age'] == each]
    print('-------------------------------------------')
    print('Training Completion for {}'.format(each))
    get_complete_figs(df,dict_start_nonseq,dict_complete_nonseq,c_nonseq)

-------------------------------------------
Training Completion for Below 18 yrs


Unnamed: 0,code,starting,completing
0,GSECFUT,4,1
1,GPREPDRY,1,0
2,GLOGROW,0,0
3,GWHKNOW,0,0
4,GGAMBLING,0,0
5,GMOMO,0,0
6,GMOSAVE,0,0
7,GBOROW,0,0
8,GMOCON,0,0
9,GCERTQ,0,0


-------------------------------------------
Training Completion for 18 - 25 yrs


Unnamed: 0,code,starting,completing
0,GSECFUT,22,16
1,GPREPDRY,14,10
2,GLOGROW,7,7
3,GWHKNOW,7,6
4,GGAMBLING,6,5
5,GMOMO,5,5
6,GMOSAVE,4,4
7,GBOROW,4,3
8,GMOCON,3,3
9,GCERTQ,3,3


-------------------------------------------
Training Completion for 26 - 35 yrs


Unnamed: 0,code,starting,completing
0,GSECFUT,22,17
1,GPREPDRY,13,11
2,GLOGROW,7,7
3,GWHKNOW,6,6
4,GGAMBLING,5,5
5,GMOMO,5,5
6,GMOSAVE,4,2
7,GBOROW,2,2
8,GMOCON,2,2
9,GCERTQ,1,1


-------------------------------------------
Training Completion for 36 - 50 yrs


Unnamed: 0,code,starting,completing
0,GSECFUT,4,4
1,GPREPDRY,4,4
2,GLOGROW,4,4
3,GWHKNOW,4,4
4,GGAMBLING,3,3
5,GMOMO,3,2
6,GMOSAVE,2,2
7,GBOROW,1,1
8,GMOCON,1,1
9,GCERTQ,1,1


-------------------------------------------
Training Completion for 51 - 65 yrs


Unnamed: 0,code,starting,completing
0,GSECFUT,1,0
1,GPREPDRY,0,0
2,GLOGROW,0,0
3,GWHKNOW,0,0
4,GGAMBLING,0,0
5,GMOMO,0,0
6,GMOSAVE,0,0
7,GBOROW,0,0
8,GMOCON,0,0
9,GCERTQ,0,0


-------------------------------------------
Training Completion for Above 65 yrs


Unnamed: 0,code,starting,completing
0,GSECFUT,1,0
1,GPREPDRY,0,0
2,GLOGROW,0,0
3,GWHKNOW,0,0
4,GGAMBLING,0,0
5,GMOMO,0,0
6,GMOSAVE,0,0
7,GBOROW,0,0
8,GMOCON,0,0
9,GCERTQ,0,0


In [150]:
for each in gender_list:
    starting = []
    completing = []
    df = nonseq_int[nonseq_int['gender'] == each]
    print('-------------------------------------------')
    print('Training Completion for {}'.format(each))
    get_complete_figs(df,dict_start_nonseq,dict_complete_nonseq,c_nonseq)

-------------------------------------------
Training Completion for Female


Unnamed: 0,code,starting,completing
0,GSECFUT,15,9
1,GPREPDRY,9,6
2,GLOGROW,5,5
3,GWHKNOW,5,5
4,GGAMBLING,5,4
5,GMOMO,4,4
6,GMOSAVE,3,2
7,GBOROW,2,2
8,GMOCON,2,2
9,GCERTQ,2,2


-------------------------------------------
Training Completion for Male


Unnamed: 0,code,starting,completing
0,GSECFUT,37,28
1,GPREPDRY,22,18
2,GLOGROW,13,13
3,GWHKNOW,12,11
4,GGAMBLING,9,9
5,GMOMO,9,8
6,GMOSAVE,7,6
7,GBOROW,5,4
8,GMOCON,4,4
9,GCERTQ,3,3


In [151]:
for each in platform_list:
    starting = []
    completing = []
    df = nonseq_int[nonseq_int['sender'] == each]
    print('-------------------------------------------')
    print('Training Completion for {}'.format(each))
    get_complete_figs(df,dict_start_nonseq,dict_complete_nonseq,c_nonseq)

-------------------------------------------
Training Completion for 22744


Unnamed: 0,code,starting,completing
0,GSECFUT,412,220
1,GPREPDRY,165,120
2,GLOGROW,85,72
3,GWHKNOW,65,59
4,GGAMBLING,54,50
5,GMOMO,47,44
6,GMOSAVE,38,32
7,GBOROW,30,29
8,GMOCON,26,25
9,GCERTQ,21,21


-------------------------------------------
Training Completion for telegram


Unnamed: 0,code,starting,completing
0,GSECFUT,2,0
1,GPREPDRY,0,0
2,GLOGROW,0,0
3,GWHKNOW,0,0
4,GGAMBLING,0,0
5,GMOMO,0,0
6,GMOSAVE,0,0
7,GBOROW,0,0
8,GMOCON,0,0
9,GCERTQ,0,0


-------------------------------------------
Training Completion for whatsapp


Unnamed: 0,code,starting,completing
0,GSECFUT,17,8
1,GPREPDRY,7,4
2,GLOGROW,2,2
3,GWHKNOW,2,1
4,GGAMBLING,1,1
5,GMOMO,0,0
6,GMOSAVE,0,1
7,GBOROW,1,1
8,GMOCON,1,1
9,GCERTQ,1,1


-------------------------------------------
Training Completion for fb


Unnamed: 0,code,starting,completing
0,GSECFUT,2,1
1,GPREPDRY,1,1
2,GLOGROW,1,1
3,GWHKNOW,1,1
4,GGAMBLING,1,1
5,GMOMO,1,1
6,GMOSAVE,1,1
7,GBOROW,1,1
8,GMOCON,1,1
9,GCERTQ,1,1


>## Quick Checks


>### Check 1. If there are learners in all variants

In [152]:
len(set(mb_int.phone_number.unique()) & set(seq_int.phone_number.unique()))

63

In [153]:
allvariants_learners = list(set(mb_int.phone_number.unique()) & set(nonseq_int.phone_number.unique()))

In [154]:
allvariants_learners

['254729602890',
 '254790416452',
 '254710638278',
 '254711653936',
 '254795111680',
 '254724575198',
 '254781439992',
 '254717222720',
 '254757577161',
 '254794580118',
 '254764578843',
 '254718752597',
 '254798090597',
 '254720986735',
 '254714502189',
 '254741513523',
 '254714685549',
 '254703821113',
 '254746987961',
 '254702147113',
 '254764308421',
 '254796887184',
 '3936006623140473',
 '254768206928',
 '254720969270',
 '254704161695',
 '254763613654',
 '254706493667',
 '254797532659',
 '254758166653',
 '254721796988',
 '254716736399',
 '254718643506',
 '254743766235',
 '254701052929',
 '254727703087',
 '254723284371',
 '254766153000',
 '254763351800',
 '254708498325',
 '254764183935',
 '254740705104',
 '254714136555',
 '254758814568',
 '254726881540',
 '254763281384',
 '254764098478',
 '254728735014',
 '254714882874',
 '254764788250',
 '254711689102',
 '254765883193',
 '254757821653',
 '254725120708',
 '254714430283',
 '254763726513',
 '254701583265',
 '254765974816',
 '25471655

In [155]:
len(set(nonseq_int.phone_number.unique()) & set(seq_int.phone_number.unique()))

39

In [156]:
len(set(mb_int.phone_number.unique()) & set(seq_int.phone_number.unique()) & set(nonseq_int.phone_number.unique()))

14

## Checking gender for the 42:

>## DROPPING THE LEARNERS IN ALL VARIANTS

In [157]:
allvariants_learners = list(set(mb_int.phone_number.unique()) & set(nonseq_int.phone_number.unique()))

In [158]:
# getting the mb interactions dataset without all variant learners
all_varlearners_mb = []
for y in allvariants_learners:
    df2 = mb_int[mb_int.phone_number == y]
    all_varlearners_mb.append(df2)
    
mbvar_learners = pd.concat(all_varlearners_mb)
# Dropping testers from the interactions dataset
drop_indexes = list(mbvar_learners.index)
mb_int_dropped = mb_int.drop(drop_indexes)
# counting the new number after dropping
mb_int_dropped.phone_number.nunique()

1171

In [159]:
# getting the interactions dataset without all variant learners
all_varlearners_seq = []
for y in allvariants_learners:
    df2 = seq_int[seq_int.phone_number == y]
    all_varlearners_seq.append(df2)
    
seqvar_learners = pd.concat(all_varlearners_seq)
# Dropping testers from the interactions dataset
drop_indexes = list(seqvar_learners.index)
seq_int_dropped = seq_int.drop(drop_indexes)
# counting the new number after dropping
seq_int_dropped.phone_number.nunique()

487

In [160]:
# getting the interactions dataset without all variant learners
all_varlearners_nonseq = []
for y in allvariants_learners:
    df2 = nonseq_int[nonseq_int.phone_number == y]
    all_varlearners_nonseq.append(df2)
    
nonseqvar_learners = pd.concat(all_varlearners_nonseq)
# Dropping testers from the interactions dataset
drop_indexes = list(nonseqvar_learners.index)
nonseq_int_dropped = nonseq_int.drop(drop_indexes)
# counting the new number after dropping
nonseq_int_dropped.phone_number.nunique()

381

>### check for mb_int

In [161]:
mb_int_check = mb_int[['phone_number','program_code','message_out']]

In [162]:
def complete_category(b,list_category,category,codes,dict_complete):
    for each in list_category:
        dat = b[b[category] == each]
        df = dat[(dat.program_code == codes[0]) | (dat.program_code == codes[1])].groupby(['message_out']).learner_id.nunique().reset_index()
        max = str(dict_complete.get(codes[0]))
        df['is_completing'] = df.message_out.apply(lambda x : True if max in x else False)
        df_complete = df[df['is_completing'] == True]
        complete = df_complete.learner_id.sum()
        print('---------------------------------')
        print('Completion for {}'.format(each))
        print(complete)

In [163]:
def duplicate_learners(b,dict_start,dict_complete,c):
    duplicate_starting = []
    duplicates_completing = []
    for each in c:
        df_course = b[(b.program_code == each)]
        max = str(dict_complete.get(each))
        min = str(dict_start.get(each))
        df_course['is_completing'] = df_course.message_out.apply(lambda x : True if max in x else False)
        df_course['is_starting'] = df_course.message_out.apply(lambda x : True if min in x else False)
        df_message_start = df_course[df_course['is_starting'] == True]
        df_message_complete = df_course[df_course['is_completing'] == True]
        duplicate_starting.append(df_message_start[df_message_start.duplicated()].phone_number.nunique())
        duplicates_completing.append(df_message_complete[df_message_complete.duplicated()].phone_number.nunique())

    df_learners = pd.DataFrame(list(zip(c,duplicate_starting,duplicates_completing)),columns=['code', 'starting','completing'])
    return df_learners

In [164]:
mb_int_test = mb_int[mb_int.program_code == 'GSAVE']

In [165]:
mb_int_test['completed'] = mb_int_test.message_out.apply(lambda x : True if '(8/8)' in x else False)

In [166]:
mb_int_test_completed = mb_int_test[mb_int_test['completed'] == True]

In [167]:
df_trial_mb = mb_int_test_completed[['phone_number','message_out']]

In [168]:
df_trial_mb[df_trial_mb.duplicated()].phone_number.unique()

array(['254708172472', '254763502452', '254721102729', '254720167245',
       '254710823452', '254711834741', '254740175434', '254722506146',
       '254706245611', '254764736045', '254794166058', '254713349479'],
      dtype=object)

In [169]:
duplicate_learners(mb_int_check,dict_start_mb,dict_complete_mb,c_mb)

Unnamed: 0,code,starting,completing
0,GSAVE,18,12
1,GQUIZ,54,40
2,GSECURE,26,18
3,GSPELLS,47,27
4,GSAVING,25,13
5,GGROW,12,11
6,GTLOAN,25,17
7,GLOAN,26,20
8,GMONEY,9,9
9,GSAVEM,8,0


In [170]:
def duplicate_learners_list(b,dict_start,dict_complete,c):
    duplicate_starting = []
    duplicates_completing = []
    for each in c:
        df_course = b[(b.program_code == each)]
        max = str(dict_complete.get(each))
        min = str(dict_start.get(each))
        df_course['is_completing'] = df_course.message_out.apply(lambda x : True if max in x else False)
        df_course['is_starting'] = df_course.message_out.apply(lambda x : True if min in x else False)
        df_message_start = df_course[df_course['is_starting'] == True]
        df_message_complete = df_course[df_course['is_completing'] == True]
        for x in list(df_message_start[df_message_start.duplicated()].phone_number.unique()):
            duplicate_starting.append(x)
        for k in list(df_message_complete[df_message_complete.duplicated()].phone_number.unique()):
            duplicates_completing.append(k)
    
    learners_starting = pd.DataFrame(duplicate_starting,columns =['start'])
    learners_completing = pd.DataFrame(duplicates_completing,columns= ['complete'])
    learners_startNo = learners_starting.start.nunique()
    learners_completeNo = learners_completing.complete.nunique()
    return learners_startNo, learners_completeNo

In [171]:
duplicate_learners_list(mb_int_check,dict_start_mb,dict_complete_mb,c_mb)

(185, 117)

In [172]:
duplicate_learners_list(mb_int_dropped[['phone_number','program_code','message_out']],dict_start_mb,dict_complete_mb,c_mb)

(171, 111)

In [173]:
duplicate_learners_list(mb_int_check,dict_start_mb_dup,dict_complete_mb_dup,c_mb_dup)

(154, 96)

In [174]:
duplicate_learners_list(mb_int_dropped[['phone_number','program_code','message_out']],dict_start_mb_dup,dict_complete_mb_dup,c_mb_dup)

(141, 91)