# Detect the ones who are only screened 

> run eligibility notebook

> run survey data notebook



In [1]:
from IPython.core.interactiveshell import InteractiveShell
from datetime import datetime
from pandas import (
    read_excel,
    to_datetime,
    ExcelWriter,
    DataFrame
)
import openpyxl

InteractiveShell.ast_node_interactivity = "all"


In [2]:
eligibility = read_excel('../eligibility.xlsx')
survey_data = read_excel('../survey_data/survey_data.xlsx')


survey_data.shape
eligibility.shape

(62899, 19)

(49120, 31)

In [3]:
survey_data.columns
eligibility.columns

Index(['case_id', 'organisation', 'first_name', 'last_name', 'dob',
       'interview_date', 'adress', 'commune', 'commune_section',
       'interviewer_firstname', 'interviewer_lastname', 'is_your_parent_alive',
       'mothers_name', 'fathers_name', 'who_is_your_law_parent', 'total',
       'is_eligible_for_dep_hiv', 'age', 'already_in_a_group'],
      dtype='object')

Index(['case_id', 'dreams_code', 'organisation', 'id', 'phone',
       'not_selectable', 'first_name', 'last_name', 'dob', 'interview_date',
       'adress', 'commune', 'commune_section', 'interviewer_firstname',
       'interviewer_lastname', 'is_your_parent_alive', 'mothers_name',
       'fathers_name', 'total', 'who_is_your_law_parent', 'group_name',
       'id_group', 'hub_name', 'id_hub', 'eske_ou_lekol_deja',
       'depi_kile_ou_pa_al_lekol_with_label',
       'c61_depi_ki_l_ou_pa_al_lekl_ank', 'c6_ske_ou_te_oblije_double_deja',
       'kiyes_ki_peye_lekol_pou_ou', 'age', 'kpi_score'],
      dtype='object')

In [4]:
#survey_data[survey_data.organisation=="CARIS"]

# Eligibility

In [5]:
eligibility.dreams_code.fillna("null",inplace=True)

In [6]:
def tranche_age_classique(age):
        if age>=10 and age<=14:
            return "10-14"
        elif age>=15 and age<=19:
            return "15-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"
        
def tranche_age_mineur_majeur(age):
        if age>=10 and age<=17:
            return "10-17"
        elif age>=18 and age<=19:
            return "18-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"

In [7]:
eligibility['age_range'] = eligibility.age.map(tranche_age_classique)
eligibility['newage_range'] = eligibility.age.map(tranche_age_mineur_majeur)

In [8]:
eligibility["date_entevyou"] = to_datetime( eligibility.interview_date)

In [9]:
def fiscalYear21(date):
    if date.year == 2021 and date.month>=1 and date.month<=3:
        return "FY21Q2"
    elif date.year == 2020 and date.month>=10 and date.month<=12:
        return "FY21Q1"
    elif date.year == 2021 and date.month>=4 and date.month<=6:
        return "FY21Q3"
    elif date.year == 2021 and date.month>=7 and date.month<=9:
        return "FY21Q4"
    else:
        return "Q3fy21-Q4fy21"
    

    

def validTimeOnSystem(date):
    if date>= datetime.strptime("2022-01-01","%Y-%m-%d") and date<= datetime.now():
        return "required_Time_on"
    else:
        return "not_valid_time_on"


In [10]:
eligibility["fiscal_year"] = eligibility.date_entevyou.map(fiscalYear21)
eligibility["timeOn_system"] = eligibility.date_entevyou.map(validTimeOnSystem)

In [11]:
almost_eligibility = eligibility[(eligibility.dreams_code == "null")&
                                 (eligibility.age_range!="25-29")&
                                 (eligibility.age_range!="not_valid_age")
]
                                 
                                # (eligibility.timeOn_system=="required_Time_on")&
                                 

In [12]:
almost_eligibility.dreams_code.count()

11829

In [13]:
def firstlastdob(df):
    return f"{df.first_name} {df.last_name} {df.dob}"

In [14]:
%%capture
almost_eligibility["unduplicate_me"] = almost_eligibility.apply(lambda df: firstlastdob(df),axis=1)
clean_eligibility =  almost_eligibility.drop_duplicates(subset=["unduplicate_me"])


In [15]:
clean_eligibility.dreams_code.count()

11678

# Survey data

In [16]:
survey_data["remove_duplicate"] = survey_data.apply(lambda df: firstlastdob(df),axis=1)

In [17]:
survey_data["date_entevyou"] = to_datetime( survey_data.interview_date)
survey_data["fiscal_year"] = survey_data.date_entevyou.map(fiscalYear21)
survey_data["timeOn_system"] = survey_data.date_entevyou.map(validTimeOnSystem)
survey_data['age_range'] = survey_data.age.map(tranche_age_classique)
survey_data['newage_range'] = survey_data.age.map(tranche_age_mineur_majeur)

In [18]:
almost_survey_data = survey_data[(survey_data.already_in_a_group == "no")&
                                 (survey_data.age_range!="25-29")&
                                 (survey_data.age_range!="not_valid_age")&
                                 (survey_data.total>=14)
]
                                 #(survey_data.timeOn_system=="required_Time_on")&

In [19]:
almost_survey_data.case_id.count()

11828

In [20]:
clean_survey_data =  almost_survey_data.drop_duplicates(subset=["remove_duplicate"])

In [21]:
clean_survey_data.case_id.count()

11677

# what we want

In [22]:
id_form_survey_data = clean_survey_data[["case_id","already_in_a_group"]]

In [23]:
update_screening_data = id_form_survey_data.merge(clean_eligibility, on="case_id",how='right')

In [24]:
update_screening_data.case_id.count()

11678

In [25]:
update_screening_data.columns

Index(['case_id', 'already_in_a_group', 'dreams_code', 'organisation', 'id',
       'phone', 'not_selectable', 'first_name', 'last_name', 'dob',
       'interview_date', 'adress', 'commune', 'commune_section',
       'interviewer_firstname', 'interviewer_lastname', 'is_your_parent_alive',
       'mothers_name', 'fathers_name', 'total', 'who_is_your_law_parent',
       'group_name', 'id_group', 'hub_name', 'id_hub', 'eske_ou_lekol_deja',
       'depi_kile_ou_pa_al_lekol_with_label',
       'c61_depi_ki_l_ou_pa_al_lekl_ank', 'c6_ske_ou_te_oblije_double_deja',
       'kiyes_ki_peye_lekol_pou_ou', 'age', 'kpi_score', 'age_range',
       'newage_range', 'date_entevyou', 'fiscal_year', 'timeOn_system',
       'unduplicate_me'],
      dtype='object')

In [26]:
update_screening_data.date_entevyou.min().strftime("%Y-%m-%d")
update_screening_data.date_entevyou.max().strftime("%Y-%m-%d")

'2017-09-24'

'2022-02-08'

In [27]:
unserved_Q1FY22 = update_screening_data[(update_screening_data.date_entevyou >="2021-10-01") & (update_screening_data.date_entevyou <="2021-12-31")]
unserved_Q2FY22 = update_screening_data[(update_screening_data.date_entevyou >="2022-01-01") & (update_screening_data.date_entevyou <="2022-03-31")]
unserved_FY21 = update_screening_data[(update_screening_data.date_entevyou >="2020-10-01") & (update_screening_data.date_entevyou <="2021-09-30")]
unserved_FY20 = update_screening_data[(update_screening_data.date_entevyou >="2019-10-01") & (update_screening_data.date_entevyou <="2020-09-30")]
unserved_Before_FY20 = update_screening_data[update_screening_data.date_entevyou <= "2019-10-01"]


unserved_Q1FY22.case_id.count()
unserved_Q2FY22.case_id.count()
unserved_FY21.case_id.count()
unserved_FY20.case_id.count()
unserved_Before_FY20.case_id.count()

1612

1005

1777

1131

6153

# save

In [28]:
repartition_Q1FY22 = DataFrame.from_dict(unserved_Q1FY22.commune.value_counts().to_dict(),orient='index',columns=['nombre_de_beneficiaires'])
repartition_Q2FY22 = DataFrame.from_dict(unserved_Q2FY22.commune.value_counts().to_dict(),orient='index',columns=['nombre_de_beneficiaires'])
repartition_FY21 = DataFrame.from_dict(unserved_FY21.commune.value_counts().to_dict(),orient='index',columns=['nombre_de_beneficiaires'])
repartition_FY20 = DataFrame.from_dict(unserved_FY20.commune.value_counts().to_dict(),orient='index',columns=['nombre_de_beneficiaires'])
repartition_Before_FY20 = DataFrame.from_dict(unserved_Before_FY20.commune.value_counts().to_dict(),orient='index',columns=['nombre_de_beneficiaires'])


toBeServed = ExcelWriter('agyw_non_servis.xlsx', engine='openpyxl')
repartition_Q1FY22.to_excel(toBeServed,sheet_name="res_Q1FY22",index=True,na_rep="") 
unserved_Q1FY22.to_excel(toBeServed,sheet_name="Q1FY22",index=False,na_rep="")
repartition_Q2FY22.to_excel(toBeServed,sheet_name="res_Q2FY22",index=True,na_rep="") 
unserved_Q2FY22.to_excel(toBeServed,sheet_name="Q2FY22",index=False,na_rep="")
repartition_FY21.to_excel(toBeServed,sheet_name="res_FY21",index=True,na_rep="") 
unserved_FY21.to_excel(toBeServed,sheet_name="FY21",index=False,na_rep="")
repartition_FY20.to_excel(toBeServed,sheet_name="res_FY20",index=True,na_rep="") 
unserved_FY20.to_excel(toBeServed,sheet_name="FY20",index=False,na_rep="")
repartition_Before_FY20.to_excel(toBeServed,sheet_name="res_Before_FY20",index=True,na_rep="") 
unserved_Before_FY20.to_excel(toBeServed,sheet_name="BeforeFY20",index=False,na_rep="")
toBeServed.save()

In [29]:
DataFrame({
    "Non_servis_Q1FY22": [unserved_Q1FY22.case_id.count()],
    "Non_servis_Q2FY22": [unserved_Q2FY22.case_id.count()],
    "Non_servis_FY21": [unserved_FY21.case_id.count()],
    "Non_servis_FY20": [unserved_FY20.case_id.count()],
    "Non_servis_Before_FY20": [unserved_Before_FY20.case_id.count()],
}).to_excel('Tableau_des_agyw__non_servis.xlsx',index=False,na_rep="")

# check