# Detect the ones who are only screened 

> run eligibility notebook

> run survey data notebook



In [4]:
%run "../eligibility.ipynb"
%run "../survey_data/survey_data.ipynb"

In [5]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"


In [6]:
survey_data.shape
eligibility.shape

(56716, 19)

(43347, 31)

In [7]:
survey_data.columns
eligibility.columns

Index(['case_id', 'organisation', 'first_name', 'last_name', 'dob',
       'interview_date', 'adress', 'commune', 'commune_section',
       'interviewer_firstname', 'interviewer_lastname', 'is_your_parent_alive',
       'mothers_name', 'fathers_name', 'who_is_your_law_parent', 'total',
       'is_eligible_for_dep_hiv', 'age', 'already_in_a_group'],
      dtype='object')

Index(['case_id', 'dreams_code', 'organisation', 'id', 'phone',
       'not_selectable', 'first_name', 'last_name', 'dob', 'interview_date',
       'adress', 'commune', 'commune_section', 'interviewer_firstname',
       'interviewer_lastname', 'is_your_parent_alive', 'mothers_name',
       'fathers_name', 'total', 'who_is_your_law_parent', 'group_name',
       'id_group', 'hub_name', 'id_hub', 'eske_ou_lekol_deja',
       'depi_kile_ou_pa_al_lekol_with_label',
       'c61_depi_ki_l_ou_pa_al_lekl_ank', 'c6_ske_ou_te_oblije_double_deja',
       'kiyes_ki_peye_lekol_pou_ou', 'age', 'kpi_score'],
      dtype='object')

In [8]:
#survey_data[survey_data.organisation=="CARIS"]

# Eligibility

In [9]:
eligibility.dreams_code.fillna("null",inplace=True)

In [10]:
def tranche_age_classique(age):
        if age>=10 and age<=14:
            return "10-14"
        elif age>=15 and age<=19:
            return "15-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"
        
def tranche_age_mineur_majeur(age):
        if age>=10 and age<=17:
            return "10-17"
        elif age>=18 and age<=19:
            return "18-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"

In [11]:
eligibility['age_range'] = eligibility.age.map(tranche_age_classique)
eligibility['newage_range'] = eligibility.age.map(tranche_age_mineur_majeur)

In [12]:
import pandas as pd
from datetime import datetime

In [13]:
eligibility["date_entevyou"] = pd.to_datetime( eligibility.interview_date)

In [14]:
def fiscalYear21(date):
    if date.year == 2021 and date.month>=1 and date.month<=3:
        return "FY21Q2"
    elif date.year == 2020 and date.month>=10 and date.month<=12:
        return "FY21Q1"
    elif date.year == 2021 and date.month>=4 and date.month<=6:
        return "FY21Q3"
    elif date.year == 2021 and date.month>=7 and date.month<=9:
        return "FY21Q4"
    else:
        return "Q3fy21-Q4fy21"
    

    

def validTimeOnSystem(date):
    if date>= datetime.strptime("2020-10-01","%Y-%m-%d") and date<= datetime.now():
        return "required_Time_on"
    else:
        return "not_valid_time_on"


In [15]:
eligibility["fiscal_year"] = eligibility.date_entevyou.map(fiscalYear21)
eligibility["timeOn_system"] = eligibility.date_entevyou.map(validTimeOnSystem)

In [16]:
almost_eligibility = eligibility[(eligibility.dreams_code == "null")&
                                 (eligibility.age_range!="25-29")&
                                 (eligibility.age_range!="not_valid_age")
]
                                 
                                # (eligibility.timeOn_system=="required_Time_on")&
                                 

In [17]:
almost_eligibility.dreams_code.count()

10199

In [18]:
def firstlastdob(df):
    return f"{df.first_name} {df.last_name} {df.dob}"

In [19]:
%%capture
almost_eligibility["unduplicate_me"] = almost_eligibility.apply(lambda df: firstlastdob(df),axis=1)
clean_eligibility =  almost_eligibility.drop_duplicates(subset=["unduplicate_me"])


In [20]:
clean_eligibility.dreams_code.count()

10136

# Survey data

In [21]:
survey_data["remove_duplicate"] = survey_data.apply(lambda df: firstlastdob(df),axis=1)

In [22]:
survey_data["date_entevyou"] = pd.to_datetime( survey_data.interview_date)
survey_data["fiscal_year"] = survey_data.date_entevyou.map(fiscalYear21)
survey_data["timeOn_system"] = survey_data.date_entevyou.map(validTimeOnSystem)
survey_data['age_range'] = survey_data.age.map(tranche_age_classique)
survey_data['newage_range'] = survey_data.age.map(tranche_age_mineur_majeur)

In [23]:
almost_survey_data = survey_data[(survey_data.already_in_a_group == "no")&
                                 (survey_data.age_range!="25-29")&
                                 (survey_data.age_range!="not_valid_age")&
                                 (survey_data.total>=14)
]
                                 #(survey_data.timeOn_system=="required_Time_on")&

In [24]:
almost_survey_data.case_id.count()

10198

In [25]:
clean_survey_data =  almost_survey_data.drop_duplicates(subset=["remove_duplicate"])

In [26]:
clean_survey_data.case_id.count()

10135

# what we want

In [27]:
id_form_survey_data = clean_survey_data[["case_id","already_in_a_group"]]

In [28]:
update_screening_data = id_form_survey_data.merge(clean_eligibility, on="case_id",how='right')

In [29]:
update_screening_data.case_id.count()

10136

# compare to old

In [30]:
screenoct21 = pd.read_excel('./old/screeneesOct21.xlsx')

In [31]:
screenoct21.case_id.count()
update_screening_data.case_id.count()

3040

10136

In [32]:
update_screening_data[update_screening_data.case_id.isin(screenoct21.case_id)].case_id.count()
update_screening_data[~update_screening_data.case_id.isin(screenoct21.case_id)].case_id.count()

1358

8778

# save

In [33]:
update_screening_data.to_excel("update_screened.xlsx",index=False,na_rep="NULL")

In [34]:
update_screening_data.date_entevyou.min().strftime("%Y-%m-%d")
update_screening_data.date_entevyou.max().strftime("%Y-%m-%d")

'2017-09-24'

'2021-10-28'