In [None]:
import os
from datetime import datetime
#from datetime import date
import pymysql
from sqlalchemy import create_engine
from decouple import config 
from dotenv import load_dotenv
import pandas as pd
import numpy as np
from enum import Enum


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
load_dotenv()
# get the environment variables needed
USER= config('USRCaris')
PASSWORD= config('PASSCaris')
HOSTNAME= config('HOSTCaris')
DBNAME= config('DBCaris')

# get the engine to connect and fetch
engine = create_engine(f"mysql+pymysql://{USER}:{PASSWORD}@{HOSTNAME}/{DBNAME}")
query = '''
SELECT 
    p.patient_code,
    b.first_name,
    b.last_name,
    b.dob,
    dsd.a1_dat_entvyou_a_ft_jjmmaa_egz_010817 AS interview_date,
    dgs.date AS session_date,
    dg.name AS group_name,
    dh.name AS hub_name,
    lc.name AS hub_commune,
    ld.name AS hub_departement,
    dsd.f_komin AS interview_commune,
    ldt.name AS topic,
    au.email as created_by,
    au.username,
    dgs.created_at as date_session_were_entered
FROM
    caris_db.dream_group_attendance dga
        LEFT JOIN
    dream_member dm ON dm.id_patient = dga.id_patient
        LEFT JOIN
    dreams_surveys_data dsd ON dsd.case_id = dm.case_id
        LEFT JOIN
    dream_group_session dgs ON dgs.id = dga.id_group_session
        LEFT JOIN
    dream_group dg ON dg.id = dgs.id_group
        LEFT JOIN
    dream_hub dh ON dh.id = dg.id_dream_hub
        LEFT JOIN
    lookup_commune lc ON lc.id = dh.commune
        LEFT JOIN
    lookup_departement ld ON ld.id = lc.departement
        LEFT JOIN
    lookup_dreams_topic ldt ON ldt.id = dgs.topic
        LEFT JOIN
    patient p ON p.id = dga.id_patient
        LEFT JOIN
    beneficiary b ON b.id_patient = dga.id_patient
        left join 
            auth_users au  on au.id=dgs.created_by
WHERE
    dga.value = 'P'
'''

evaluation_performance_attendance = pd.read_sql_query(query,engine,parse_dates=True)


# close the pool of connection
engine.dispose()

In [None]:
## get the test excel file from Query (au besoin de verification)
evaluation_performance_attendance.to_excel('epa.xlsx',index=False,na_rep="NULL")

## Duplication aspect

In [None]:
evaluation_performance_attendance.dtypes

In [None]:
epa_deduplicate = evaluation_performance_attendance.drop_duplicates(subset=['patient_code'])

In [None]:
evaluation_performance_attendance.shape
epa_deduplicate.shape

## Period of control aspect

In [None]:
class Constante(Enum):
    march_1_2021 = '2021-03-01'
    specific = "2021-07-26"

In [None]:
presencesaisie_startmars_endnow = evaluation_performance_attendance[evaluation_performance_attendance["date_session_were_entered"]>=Constante.march_1_2021.value]
presencesaisie_specific = evaluation_performance_attendance[evaluation_performance_attendance["date_session_were_entered"]>=Constante.specific.value] 
epa_startmars_endnow= epa_deduplicate[epa_deduplicate['date_session_were_entered']>=Constante.march_1_2021.value]

In [None]:
presencesaisie_startmars_endnow.shape
presencesaisie_specific.shape
epa_startmars_endnow.shape

## Count by agents in descending order

In [None]:
performance_session_global  = epa_startmars_endnow.groupby(['created_by']).count()
presence_global = presencesaisie_startmars_endnow.groupby(['created_by']).count()
presence_specific = presencesaisie_specific.groupby(['created_by']).count()

In [None]:
performance_session_global.sort_values(ascending=False, by='patient_code',inplace=True)
presence_global.sort_values(ascending=False, by='patient_code',inplace=True)
presence_specific.sort_values(ascending=False, by='patient_code',inplace=True)


In [None]:
performance_session  = pd.DataFrame( {"nombre_de_filles": performance_session_global['patient_code'], f"nombre_de_presence_saisie_depuis_le_{Constante.march_1_2021.value}":presence_global["patient_code"],
                                      f"nombre_de_presence_saisie_depuis_le_{Constante.specific.value}":presence_specific["patient_code"]})
performance_session.fillna(0,inplace=True)
performance_session

## MODIFY BELOW TO GET THE DIFFERENCE GIVEN LAST WEEK

In [None]:
week_before_perf = pd.read_excel("rapport_agents_saisie_des_sessions_26Juillet2021.xlsx", sheet_name="filles_saisies_global", index_col="created_by")
#performance_session["nombre_de_filles_saisie_par_rapport_a_la_semaine_derniere"] =  performance_session["nombre_de_filles"] - week_before_perf["nombre_de_filles"]
#performance_session.nombre_de_filles_saisie_par_rapport_a_la_semaine_derniere = performance_session.nombre_de_filles_saisie_par_rapport_a_la_semaine_derniere.astype("int16")

In [None]:
performance_session.fillna(0,inplace=True)
performance_session[f"nombre_de_presence_saisie_depuis_le_{Constante.specific.value}"] = performance_session[f"nombre_de_presence_saisie_depuis_le_{Constante.specific.value}"].astype('int16')

In [None]:
columns_performance_session = performance_session.columns.sort_values().tolist()
performance_session = performance_session.reindex(columns=columns_performance_session)

In [None]:
performance_session

## Agent per Commune

In [None]:
performance_agents_par_commune = epa_startmars_endnow.pivot_table(index='created_by',columns='hub_commune',values=['patient_code'],aggfunc='count',fill_value=0,margins=True, margins_name="Grand Total")
#performance_agents_par_commune.reset_index(inplace=True)

In [None]:
performance_agents_par_commune.sort_values(by=('patient_code',"Grand Total") ,ascending=False,inplace=True)

In [None]:
performance_agents_par_commune

## Reporting Performance de mars 2021 a date

In [None]:
pac = pd.ExcelWriter('rapport_agents_saisie_des_sessions.xlsx',engine='xlsxwriter')
performance_session.to_excel(pac,sheet_name='filles_saisies_global')
performance_agents_par_commune.to_excel(pac,sheet_name="filles_saisies_par_communes")
pac.save()

## Verification sheet

In [None]:
def fiscalYear21(date):
    if date.year == 2021 and date.month>=1 and date.month<=3:
        return "FY21Q2"
    elif date.year == 2020 and date.month>=10 and date.month<=12:
        return "FY21Q1"
    elif date.year == 2021 and date.month>=4 and date.month<=6:
        return "FY21Q3"
    elif date.year == 2021 and date.month>=7 and date.month<=9:
        return "FY21Q4"
    else:
        return "Q3fy20-Q4fy20"
    
def validTimeOnSystem(date):
    if date>= datetime.strptime("2020-04-01","%Y-%m-%d") and date<= datetime.now():
        return "required_Time_on"
    else:
        return "not_valid_time_on"

In [None]:
epa_startmars_endnow.interview_date.isna().sum()

In [None]:
%%capture
epa_startmars_endnow["date_entevyou"] = pd.to_datetime( epa_startmars_endnow.interview_date)

In [None]:
epa_startmars_endnow.date_entevyou

In [None]:
%%capture
epa_startmars_endnow["fiscal_year"] = epa_startmars_endnow.date_entevyou.map(fiscalYear21)
epa_startmars_endnow["time_On_System"] = epa_startmars_endnow.date_entevyou.map(validTimeOnSystem)

In [None]:
epa_startmars_endnow.columns

In [None]:
epa_startmars_endnow.dob

In [None]:
from dateutil.relativedelta import relativedelta

In [None]:
def get_age(date):
    return relativedelta(datetime.today(),date).years

def tranche_age_classique(age):
        if age>=10 and age<=14:
            return "10-14"
        elif age>=15 and age<=19:
            return "15-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"
        
def tranche_age_mineur_majeur(age):
        if age>=10 and age<=17:
            return "10-17"
        elif age>=18 and age<=19:
            return "18-19"
        elif age>=20 and age<=24:
            return "20-24"
        elif age>=25 and age<=29:
            return "25-29"
        else:
            return "not_valid_age"

In [None]:
%%capture
epa_startmars_endnow["age"] = epa_startmars_endnow.dob.map(get_age)
epa_startmars_endnow["age_range"] = epa_startmars_endnow.age.map(tranche_age_classique)
epa_startmars_endnow["newage_range"] = epa_startmars_endnow.age.map(tranche_age_mineur_majeur)

In [None]:
epa_startmars_endnow.age.unique()
epa_startmars_endnow.age_range.unique()
epa_startmars_endnow.newage_range.unique()

In [None]:
epa_startmars_endnow.columns

In [None]:
epa_fy21 = epa_startmars_endnow[(epa_startmars_endnow.age_range !="not_valid_age") & (epa_startmars_endnow.age_range != "25-29") & (epa_startmars_endnow.fiscal_year != "Q3fy20-Q4fy20")]
all_epa = epa_startmars_endnow[(epa_startmars_endnow.age_range !="not_valid_age") & (epa_startmars_endnow.age_range != "25-29")]

valid_epa_fy21 = epa_startmars_endnow[(epa_startmars_endnow.time_On_System == "required_Time_on")&(epa_startmars_endnow.age_range !="not_valid_age") & (epa_startmars_endnow.age_range != "25-29") & (epa_startmars_endnow.fiscal_year != "Q3fy20-Q4fy20")]
valid_all_epa = epa_startmars_endnow[(epa_startmars_endnow.time_On_System == "required_Time_on")&(epa_startmars_endnow.age_range !="not_valid_age") & (epa_startmars_endnow.age_range != "25-29")]

In [None]:
print("epa_startmars")
epa_startmars_endnow.patient_code.count()
print("epa_fy21")
epa_fy21.patient_code.count()
print("all_epa")
all_epa.patient_code.count()
print("valid epa_fy21")
valid_epa_fy21.patient_code.count()
print("valid_all_epa")
valid_all_epa.patient_code.count()
print("not valid")
epa_startmars_endnow.patient_code.count() - valid_all_epa.patient_code.count()



In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

pivot_epa = epa_fy21.pivot_table(columns="fiscal_year",values="patient_code",fill_value=0,aggfunc='count')

X= list(pivot_epa.columns)
Y= pivot_epa.values.tolist()[0]
data = []
colonnes = []

for x,y in zip(X,Y):
    data.append(y)
    colonnes.append(x)


df = pd.DataFrame({"fy":colonnes, "dt":data})

plt.figure(figsize=(16,8))
sns.set_style("darkgrid")
splot = sns.barplot(x="dt",y="fy",data=df)
splot.set_xlabel("")
splot.set_ylabel("")
plt.suptitle("AGYW, entered since march 2021 by the data clerk, presence in currilum")
for p in splot.patches:
    width = p.get_width()
    plt.text(2+p.get_width(), p.get_y()+0.50*p.get_height(),
             '{:1.0f}'.format(width), fontdict=dict(color="red",fontsize=12))
    
plt.annotate(
    "source: HIVHaiti",(0,0), (-80,-20), fontsize=10, 
             xycoords='axes fraction', textcoords='offset points', va='top'
)

plt.savefig("curriculum_Presence",dpi=400)

In [None]:
pivot_all_epa = valid_all_epa.pivot_table(columns="fiscal_year",values="patient_code",fill_value=0,aggfunc='count')

X= list(pivot_all_epa.columns)
Y= pivot_all_epa.values.tolist()[0]
data = []
colonnes = []

for x,y in zip(X,Y):
    data.append(y)
    colonnes.append(x)


df = pd.DataFrame({"fy":colonnes, "dt":data})

plt.figure(figsize=(16,8))
sns.set_style("darkgrid")
splot = sns.barplot(x="dt",y="fy",data=df)
splot.set_xlabel("")
splot.set_ylabel("")
plt.suptitle("AGYW, entered since march 2021 by the data clerk, presence in currilum")
for p in splot.patches:
    width = p.get_width()
    plt.text(2+p.get_width(), p.get_y()+0.50*p.get_height(),
             '{:1.0f}'.format(width),fontdict=dict(color="red",fontsize=12))

plt.savefig("curriculum_all_fiscal",dpi=1000)