In [1]:
from pandas import DataFrame, read_sql_query, Int32Dtype, read_excel
from datetime import datetime
from IPython.core.interactiveshell import InteractiveShell
import pymysql
from sqlalchemy import create_engine
from decouple import config 
from dotenv import load_dotenv
from numpy import nan

InteractiveShell.ast_node_interactivity = "all"

In [2]:
load_dotenv()
# get the environment variables needed
USER= config('USRCaris')
PASSWORD= config('PASSCaris')
HOSTNAME= config('HOSTCaris')
DBNAME= config('DBCaris')

# get the engine to connect and fetch
engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}@{HOSTNAME}/{DBNAME}")
query = '''
SELECT 
    dm.id_patient as id_patient,
    d.case_id,
    p.patient_code AS code,
    d.a_non_patisipan_an AS first_name,
    d.b_siyati AS last_name,
    TIMESTAMPDIFF(YEAR,
        d.nan_ki_dat_ou_fet,
        now()) AS age,
    d.nan_ki_dat_ou_fet AS dob,
    d.a1_dat_entvyou_a_ft_jjmmaa_egz_010817 AS interview_date,
    d.e__telefn,
    d.d_adrs AS adress,
    IF(dm.id IS NOT NULL, 'yes', 'no') AS already_in_a_group,
    dm.id_group AS actual_id_group,
    dg.name AS actual_group_name,
    dm.id_parenting_group AS actual_id_parenting_group,
    dpg.name AS actual_parenting_group_name,
    dh.name AS actual_hub,
    ld.name AS actual_departement,
    d.f_komin AS commune,
    d.g_seksyon_kominal AS commune_section,
    d.b1_non_moun_mennen_entvyou_a AS interviewer_firstname,
    d.c1_siyati_moun_ki_f_entvyou_a AS interviewer_lastname,
    d.d1_kad AS interviewer_role,
    d.lot_kad AS interviewer_other_info,
    d.h_kote_entvyou_a_ft AS interview_location,
    d.paran_ou_vivan AS is_your_parent_alive,
    d.i_non_manman AS mothers_name,
    d.j_non_papa AS fathers_name,
    d.k_reskonsab_devan_lalwa AS who_is_your_law_parent,
    d.total,
    d.organisation
FROM
    caris_db.dreams_surveys_data d
        LEFT JOIN
    dream_member dm ON dm.case_id = d.case_id
        LEFT JOIN
    patient p ON p.id = dm.id_patient
        LEFT JOIN
    dream_group dg ON dg.id = dm.id_group
        LEFT JOIN
    dream_group dpg ON dpg.id = dm.id_parenting_group
        LEFT JOIN
    dream_hub dh ON dh.id = dg.id_dream_hub
        LEFT JOIN
    lookup_commune lc ON lc.id = dh.commune
        LEFT JOIN
    lookup_departement ld ON ld.id = lc.departement
'''

sdata = read_sql_query(query,engine,parse_dates=True)
sdata.replace(r'^\s*$', nan, regex=True, inplace=True)
sdata.id_patient = sdata.id_patient.astype(Int32Dtype())
sdata.code = sdata.code.fillna('---')
sdata.total = sdata.total.astype(Int32Dtype())
sdata.age = sdata.age.astype(Int32Dtype())
sdata = sdata[sdata.total>=14]
# close the pool of connection
engine.dispose()

True

In [4]:
data = read_excel("./schooling_dreams_2022_08_01.xlsx")

In [5]:
data.columns

Index(['number', 'caseid', 'name', 'Office', 'agent_name',
       'agent_office_group', 'commentaire_verification', 'dat_peyman_fet',
       'date_of_contract', 'dreams_code', 'eskew_peye',
       'full_name_of_other_caris_responsible', 'fullname', 'gender', 'gps',
       'has_schooling_form_2021_2022', 'infant_class', 'infant_commune',
       'infant_dob', 'is_caris_activities_member',
       'is_infant_has_other_organization_scholarship',
       'is_infant_success_school_year',
       'is_parent_or_responsible_able_to_pay_other_fees',
       'konbyen_kob_caris_peye', 'konpare_dob', 'nom_ajan_an',
       'nom_patisipan_dreams', 'nom_timoun_lan', 'not_succeed_reason',
       'office1', 'other_family', 'other_phone_number', 'other_phone_number_1',
       'other_reason', 'person_relation_with_infant', 'photo', 'real_dob',
       'responsible_person_adress', 'responsible_person_first_name',
       'responsible_person_last_name', 'responsible_person_phone_number',
       'school_address', 

In [11]:
data = data[data.closed==False]
payed = data[data.eskew_peye=="1"]

In [None]:
sdata.case_id.count()
data.code.count()
#datim.id_patient.count()

70828

61

In [None]:
#sp = sdata[sdata.code.isin(data.code)]
#sp.code.count()

sp = data[data.code.isin(sdata.code)]
sp.code.count()

61

In [None]:
sd =  sdata[['code','age','organisation']]
df =  data[['code','form.paiement_ecole.nom_timoun_lan','form.paiement_ecole.dat_peyman_fet','form.paiement_ecole.nom_ajan_an']]

In [None]:
wp = df.merge(sd,how="left",on='code')

In [None]:
wp.age =  wp.age.astype(Int32Dtype())

def age_range(age):
    if age >=10 and age <=14:
        return "10-14"
    elif age >=15 and age <=19:
        return "15-19"
    elif age >=20 and age <=24:
        return "20-24"
    else:
        return "no"
    

wp['age_range'] = wp.age.map(age_range)

In [None]:
resultat = wp.pivot_table(
    values="code",
    index="organisation",
    columns="age_range",
    aggfunc="count",
    fill_value=0,
    margins=True,
    margins_name="Total"
)
resultat
resultat.to_excel('presentation_schooling.xlsx',index=True,na_rep="")
wp.to_excel('paiment_schooling.xlsx',index=False,na_rep="")

age_range,10-14,15-19,20-24,Total
organisation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CARIS,41,15,1,57
Caris,1,3,0,4
Total,42,18,1,61
