# Detect the ones who are only screened 

> run eligibility notebook

> run survey data notebook



In [2]:
%run "../eligibility.ipynb"
%run "../survey_data/survey_data.ipynb"

In [39]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
survey_data.shape
eligibility.shape

(50032, 19)

(37398, 36)

In [40]:
survey_data.columns
eligibility.columns

Index(['case_id', 'organisation', 'first_name', 'last_name', 'dob',
       'interview_date', 'adress', 'commune', 'commune_section',
       'interviewer_firstname', 'interviewer_lastname', 'is_your_parent_alive',
       'mothers_name', 'fathers_name', 'who_is_your_law_parent', 'total',
       'is_eligible_for_dep_hiv', 'age', 'already_in_a_group'],
      dtype='object')

Index(['case_id', 'dreams_code', 'organisation', 'id', 'phone',
       'not_selectable', 'first_name', 'last_name', 'dob', 'interview_date',
       'adress', 'commune', 'commune_section', 'interviewer_firstname',
       'interviewer_lastname', 'is_your_parent_alive', 'mothers_name',
       'fathers_name', 'total', 'who_is_your_law_parent', 'group_name',
       'id_group', 'hub_name', 'id_hub', 'eske_ou_lekol_deja',
       'depi_kile_ou_pa_al_lekol_with_label',
       'c61_depi_ki_l_ou_pa_al_lekl_ank', 'c6_ske_ou_te_oblije_double_deja',
       'kiyes_ki_peye_lekol_pou_ou', 'age', 'kpi_score', 'age_range',
       'newage_range', 'date_entevyou', 'fiscal_year', 'timeOn_system'],
      dtype='object')

## Eligibility

In [41]:
eligibility.dreams_code.isnull().sum()

0

In [42]:
eligibility.dreams_code.fillna("null",inplace=True)

In [43]:
eligibility.age.unique()

array([ 17,  14,  13,  15,  18,  10,  24,  16,  20,  11,  19,  21,  12,
         8,   9,   0,   7,  23,  22, 117,  -4,   3,   4,  -5,   6, -17,
         5,  -7,  26,   1, -58, -14,   2,  -1, -13, -18, -99,  -2, -16,
        28, -11,  -3], dtype=int64)

In [44]:
def tranche_age_classique(age):
        if age>=10 and age<=14:
            return "10-14"
        elif age>=15 and age<=19:
            return "15-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"
        
def tranche_age_mineur_majeur(age):
        if age>=10 and age<=17:
            return "10-17"
        elif age>=18 and age<=19:
            return "18-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"

In [45]:
eligibility['age_range'] = eligibility.age.map(tranche_age_classique)
eligibility['newage_range'] = eligibility.age.map(tranche_age_mineur_majeur)

In [46]:
eligibility.newage_range.unique()
eligibility.age_range.unique()

array(['10-17', '18-19', '20-24', 'not_valid_age', '25-29'], dtype=object)

array(['15-19', '10-14', '20-24', 'not_valid_age', '25-29'], dtype=object)

In [47]:
import pandas as pd
from datetime import datetime

In [48]:
eligibility["date_entevyou"] = pd.to_datetime( eligibility.interview_date)

In [49]:
def fiscalYear21(date):
    if date.year == 2021 and date.month>=1 and date.month<=3:
        return "FY21Q2"
    elif date.year == 2020 and date.month>=10 and date.month<=12:
        return "FY21Q1"
    elif date.year == 2021 and date.month>=4 and date.month<=6:
        return "FY21Q3"
    elif date.year == 2021 and date.month>=7 and date.month<=9:
        return "FY21Q4"
    else:
        return "not_valid_fy"
    

    

def validTimeOnSystem(date):
    if date>= datetime.strptime("2020-04-01","%Y-%m-%d") and date<= datetime.now():
        return "required_Time_on"
    else:
        return "not_valid_time_on"


In [50]:
eligibility["fiscal_year"] = eligibility.date_entevyou.map(fiscalYear21)
eligibility["timeOn_system"] = eligibility.date_entevyou.map(validTimeOnSystem)

In [51]:
almost_eligibility = eligibility[(eligibility.dreams_code == "null")&
                                 (eligibility.timeOn_system=="required_Time_on")&
                                 (eligibility.age_range!="25-29")&
                                 (eligibility.age_range!="not_valid_age")]

In [55]:
almost_eligibility.dreams_code.count()

4391

In [64]:
def firstlastdob(df):
    return f"{df.first_name} {df.last_name} {df.dob}"

In [65]:
%%capture
almost_eligibility["unduplicate_me"] = almost_eligibility.apply(lambda df: firstlastdob(df),axis=1)

In [69]:
clean_eligibility =  almost_eligibility.drop_duplicates(subset=["unduplicate_me"])

In [71]:
clean_eligibility.dreams_code.count()

4360