In [41]:
import os
from datetime import datetime
#from datetime import date
import pymysql
from sqlalchemy import create_engine
from decouple import config 
from dotenv import load_dotenv
import pandas as pd
from numpy import int16

In [2]:
load_dotenv()
# get the environment variables needed
USER= config('USRCaris')
PASSWORD= config('PASSCaris')
HOSTNAME= config('HOSTCaris')
DBNAME= config('DBCaris')

In [3]:
# get the engine to connect and fetch
engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}@{HOSTNAME}/{DBNAME}")
query = '''
SELECT 
    dm.id_patient AS main_id,
    IF(TIMESTAMPDIFF(YEAR,
            dsd.nan_ki_dat_ou_fet,
            NOW()) >= 18,
        IF(dsd.a7_Ak_kiles_w_ap_viv_15_19 = '3'
                OR dsd.a1121_aktivite_pouw_rantre_kob_ou_vle_fe = '0'
                or dsd.c6b_Kiles_ki_peye_lekol_ou_Tranche_15_19 = '3'
                or dsd.eske_ou_bay_kob_pou_pran_swen_piti_ou_ayo = 'oui',
            'yes_sup18',
            'no18'),
        if(
			dsd.a7_ak_kils_w_ap_viv_10_14 = '6'
            OR dsd.a1121_aktivite_pouw_rantre_kob_ou_vle_fe = '0'
            OR dsd.c6b_kils_ki_peye_lekl_ou_10_14 = '5'
            or dsd.eske_ou_bay_kob_pou_pran_swen_piti_ou_ayo = 'oui',
            'yes_inf17'
            ,'no17'	)
		) AS muso_eligibility,
    dm.id_group AS actual_id_group,
    b.groups AS group_she_take_sessions,
    p.patient_code AS code,
    ben.last_name,
    ben.first_name,
    ben.dob,
    TIMESTAMPDIFF(YEAR, ben.dob, NOW()) AS age,
    b.pres AS number_of_different_topic,
    b.first_session_date,
    b.last_session_date,
    MAX(dhi.test_date) AS last_hiv_test_date,
    GROUP_CONCAT(DISTINCT dhi.test_result, ',') AS test_results,
    GROUP_CONCAT(DISTINCT ltlr.name, ',') AS test_results_with_label,
    MAX(dhi.condoms_reception_date) AS last_condoms_reception_date,
    MAX(dhi.vbg_treatment_date) AS last_vbg_treatment_date,
    MAX(dhi.gynecological_care_date) AS last_gynecological_care_date,
    dg.name AS actual_group_name,
    dh.name AS actual_hub,
    lc.name AS actual_commune,
    dh.commune AS actual_commune_id,
    ld.name AS actual_departement,
    lc.departement AS actual_departement_id,
    IF(mgm.id_patient IS NOT NULL,
        'yes',
        'no') AS is_muso,
    IF(gb.case_id IS NOT NULL, 'yes', 'no') AS is_gardening,
    tf.*,
    dsd.*
FROM
    dream_member dm
        LEFT JOIN
    (SELECT 
        SUM(a.value = 'P') AS pres,
            a.id_patient,
            GROUP_CONCAT(DISTINCT a.id_group, ',') AS groups,
            MIN(a.date) AS first_session_date,
            MAX(a.date) AS last_session_date
    FROM
        (SELECT 
        dga.*, dgs.topic, dgs.date, dgs.id_group
    FROM
        dream_group_attendance dga
    LEFT JOIN dream_group_session dgs ON dgs.id = dga.id_group_session
    WHERE
        dga.value = 'P'
    GROUP BY dga.id_patient , dgs.topic) a
    GROUP BY a.id_patient) b ON b.id_patient = dm.id_patient
        LEFT JOIN
    beneficiary ben ON ben.id_patient = dm.id_patient
        LEFT JOIN
    patient p ON p.id = dm.id_patient
        LEFT JOIN
    caris_db.dream_hivinfos dhi ON dhi.id_patient = dm.id_patient
        LEFT JOIN
    lookup_testing_lab_result ltlr ON ltlr.id = dhi.test_result
        LEFT JOIN
    caris_db.dream_group dg ON dg.id = dm.id_group
        LEFT JOIN
    dream_hub dh ON dh.id = dg.id_dream_hub
        LEFT JOIN
    lookup_commune lc ON lc.id = dh.commune
        LEFT JOIN
    lookup_departement ld ON ld.id = lc.departement
        LEFT JOIN
    dreams_surveys_data dsd ON dsd.case_id = dm.case_id
        LEFT JOIN
    tracking_familymember tf ON tf.id_patient = dm.id_patient
        LEFT JOIN
    muso_group_members mgm ON mgm.id_patient = dm.id_patient
        LEFT JOIN
    gardening_beneficiary gb ON gb.code_dreams = p.patient_code
GROUP BY dm.id_patient
'''

dreams_mastersheet = pd.read_sql_query(query,engine,parse_dates=True)

In [16]:
# close the pool of connection
engine.dispose()

## Work on the age aspect

In [11]:
# how many NA
dreams_mastersheet.age.isna().sum()

0

In [12]:
# turn to integer
dreams_mastersheet.age = dreams_mastersheet.age.fillna(-1000)
dreams_mastersheet.age = dreams_mastersheet.age.astype(int16)

In [13]:
dreams_mastersheet.age

0       -1000
1          16
2          19
3          20
4          19
         ... 
19973      13
19974      10
19975      15
19976      12
19977      14
Name: age, Length: 19978, dtype: int16

In [7]:
# tranche d'age classique pr les services agyw
# tranche age mineur majeur pour les services agyw



def tranche_age_classique(age):
        if age>=10 and age<=14:
            return "10-14"
        elif age>=15 and age<=19:
            return "15-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"
        
def tranche_age_mineur_majeur(age):
        if age>=10 and age<=17:
            return "10-17"
        elif age>=18 and age<=19:
            return "18-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"

In [8]:
dreams_mastersheet['age_range'] = dreams_mastersheet.age.map(tranche_age_classique)
dreams_mastersheet['newage_range'] = dreams_mastersheet.age.map(tranche_age_mineur_majeur)

In [15]:
#dreams_mastersheet.newage_range
dreams_mastersheet.age_range

0        not_valid_age
1                15-19
2                15-19
3                20-24
4                15-19
             ...      
19973            10-14
19974            10-14
19975            15-19
19976            10-14
19977            10-14
Name: age_range, Length: 19978, dtype: object

## Work on the interview date aspect

In [22]:
dreams_mastersheet.a1_dat_entvyou_a_ft_jjmmaa_egz_010817.isna().sum()

0

In [26]:
dreams_mastersheet["date_entevyou"] = pd.to_datetime( dreams_mastersheet.a1_dat_entvyou_a_ft_jjmmaa_egz_010817)

In [34]:
dreams_mastersheet.date_entevyou

0       2020-07-09
1       2019-12-18
2       2019-12-18
3       2019-12-18
4       2019-12-18
           ...    
19973   2020-08-20
19974   2020-12-13
19975   2020-12-12
19976   2020-12-11
19977   2020-08-20
Name: date_entevyou, Length: 19978, dtype: datetime64[ns]

In [63]:
def fiscalYear21(date):
    if date.year == 2021 and date.month>=1 and date.month<=3:
        return "FY21Q2"
    elif date.year == 2020 and date.month>=10 and date.month<=12:
        return "FY21Q1"
    else:
        return "not_valid_fy"
    

    

def validTimeOnSystem(date):
    if date>= datetime.strptime("2020-04-01","%Y-%m-%d") and date<= datetime.now():
        return "required_Time_on"
    else:
        return "not_valid_time_on"
        
    

    
def between_now_date_entevyou(date):
    return (datetime.now().year - date.year) * 12 + (datetime.now().month - date.month)




def agywPeriods(months):
    if months <= 6:
        return "0-6 months"
    elif months>=7 and months<=12:
        return "7-12 months"
    elif months>=13 and months<=24:
        return "13-24 months"
    else:
        return "25+ months"

In [65]:
dreams_mastersheet["fiscal_year"] = dreams_mastersheet.date_entevyou.map(fiscalYear21)
dreams_mastersheet["timeOn_system"] = dreams_mastersheet.date_entevyou.map(validTimeOnSystem)
dreams_mastersheet["months_now_dateEntevyou"] = dreams_mastersheet.date_entevyou.map(between_now_date_entevyou)
dreams_mastersheet["agyw_period_range"] = dreams_mastersheet.months_now_dateEntevyou.map(agywPeriods)

In [68]:
#dreams_mastersheet.fiscal_year
#dreams_mastersheet.timeOn_system
#dreams_mastersheet.months_now_dateEntevyou
dreams_mastersheet.agyw_period_range

0         7-12 months
1        13-24 months
2        13-24 months
3        13-24 months
4        13-24 months
             ...     
19973     7-12 months
19974      0-6 months
19975      0-6 months
19976      0-6 months
19977     7-12 months
Name: agyw_period_range, Length: 19978, dtype: object

## Curriculum services aspect

In [70]:
dreams_mastersheet.number_of_different_topic.isna().sum()

1580

In [72]:
dreams_mastersheet.number_of_different_topic = dreams_mastersheet.number_of_different_topic.fillna(-1000)
dreams_mastersheet.number_of_different_topic = dreams_mastersheet.number_of_different_topic.astype(int16)

In [73]:
dreams_mastersheet.number_of_different_topic.unique()

array([-1000,    18,    10,    13,    17,    14,    15,     6,    16,
          11,     3,     2,     7,     9,     8,     1,     4,    12,
           5], dtype=int16)

In [79]:
def curriculum_atLeastOneService(topics):
    return "servis_auMoins_1fois" if topics>=1 and topics<=18 else "zero_services_curriculum"


def status_curriculum(topics):
    if topics>=1 and topics<=13:
        return "curriculum incomplet"
    elif topics>=14 and topics<=18:
        return "curriculum complet"
    else:
        return "non-recu0"



### - Au moins un services du Curriculum

In [75]:
dreams_mastersheet['curriculum_servis_auMoins_1fois'] = dreams_mastersheet.number_of_different_topic.map(curriculum_atLeastOneService)

In [78]:
dreams_mastersheet.curriculum_servis_auMoins_1fois.unique()

array(['zero_services_curriculum', 'servis_auMoins_1fois'], dtype=object)

### - Curriculum Status

In [80]:
dreams_mastersheet['curriculum'] = dreams_mastersheet.number_of_different_topic.map(status_curriculum)

In [83]:
dreams_mastersheet.curriculum.unique()

array(['non-recu0', 'curriculum complet', 'curriculum incomplet'],
      dtype=object)

## HTS aspect