In [1]:
import numpy as np
import pandas as pd
import warnings
from my_analysis_toolkit import read_encrypted_excels, generate_binary_sequence
from my_static_data import tt_passwords, follow_up_columns, follow_up_date_columns, follow_up_basic_columns, follow_up_int_columns

In [2]:
# Avoid representing large numbers in scientific form. To reset, use the commented line.
pd.set_option('display.float_format', '{:.1f}'.format)
# pd.reset_option('display.float_format')

# Display maximum column width:
pd.set_option('display.max_colwidth', None)

# Suppress openpyxl data validation warning
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [3]:
# Static Code Variables
sheet = 'FUA'
sp = 'fusp'
int_columns = follow_up_int_columns
columns_names = follow_up_columns
basic_columns = follow_up_basic_columns
date_columns = follow_up_date_columns
passwords = tt_passwords

In [4]:
tracking_tools = read_encrypted_excels(r'000', passwords)

Processing 83 sheets across all files:
Processed files and sheets:                                                        
- TS_FC_meetingOutreachReferralFollowUpAdvocacy.xlsx: Meetings, outreachAndAwareness, IngoingOutgoingInternalReferral, Follow-up, Advocacy, GZT_Service_Map, Drop-down, Glossary
- tt_psc_IJ_v04.xlsx: Scr, Int, GC, IC, FUA, PEI, TRW, TD, CWS, AW, Drop-down
- tt_psc_LA_v04.xlsx: Scr, Int, GC, IC, FUA, PEI, TRW, TD, CWS, AW, Drop-down
- tt_psc_MT_v04.xlsx: Scr, Int, GC, IC, FUA, PEI, TRW, TD, CWS, AW, Drop-down
- tt_psc_SA_v04.xlsx: Scr, Int, GC, IC, FUA, PEI, TRW, TD, CWS, AW, Drop-down
- tt_psc_YQ-v04.xlsx: Scr, Int, GC, IC, FUA, PEI, TRW, TD, CWS, AW, Drop-down
- tt_pt_HJ_v04.xlsx: PSFS, PT Int, PT Groups, GPT, IPT, FUA, Heba, reference, drop-down lists, drop_down
- tt_pt_HR_v04.xlsx: PSFS, PT Int, PT Groups, GPT, IPT, FUA, Hind, reference, drop-down lists, drop_down


# Process Follow Up Assessment Sheets

In [5]:
yq = tracking_tools['tt_psc_YQ-v04.xlsx'][sheet]

In [7]:
yq.columns = columns_names
yq = yq.drop(yq.index[0]).reset_index(drop=True)
yq.dropna(subset=basic_columns, how='all', inplace=True)
yq.insert(0, sp, 'YQ')

In [9]:
sa = tracking_tools['tt_psc_SA_v04.xlsx'][sheet]

In [10]:
sa.columns = columns_names
sa = sa.drop(sa.index[0]).reset_index(drop=True)
sa.dropna(subset=basic_columns, how='all', inplace=True)
sa.insert(0, sp, 'SA')

In [11]:
ij = tracking_tools['tt_psc_IJ_v04.xlsx'][sheet]

In [12]:
ij.columns = columns_names
ij = ij.drop(ij.index[0]).reset_index(drop=True)
ij.dropna(subset=basic_columns, how='all', inplace=True)
ij.insert(0, sp, 'IJ')

In [13]:
la = tracking_tools['tt_psc_LA_v04.xlsx'][sheet]

In [14]:
la.columns = columns_names
la = la.drop(la.index[0]).reset_index(drop=True)
la.dropna(subset=basic_columns, how='all', inplace=True)
la.insert(0, sp, 'LA')

In [15]:
mt = tracking_tools['tt_psc_MT_v04.xlsx'][sheet]

In [16]:
mt.columns = columns_names
mt = mt.drop(mt.index[0]).reset_index(drop=True)
mt.dropna(subset=basic_columns, how='all', inplace=True)
mt.insert(0, sp, 'MT')

In [17]:
gc_rows = mt.shape[0] + yq.shape[0] + sa.shape[0] + ij.shape[0] + la.shape[0]
gc_rows

174

In [18]:
follow_up = pd.concat([yq, sa, ij, la, mt], ignore_index=True)

In [19]:
follow_up

Unnamed: 0,fusp,fuspi,rid,fcid,firstname,lastname,3m,6m,12m,status,closure_reason,note
0,YQ,1,R0130,8110010011,صباح,شيخ هلال,2021-04-07 00:00:00,,,Closed,Moved to another city/country,
1,YQ,2,R0024,8110020012,نجوى,حصرم,2020-11-24 00:00:00,2021-02-15 00:00:00,2021-08-17 00:00:00,Closed,Therapy completed (received all needed services),تم الانقطاع عن الجروب بسبب قرارات الحكومة التركية بشان الكورونا
2,YQ,3,R0010,8110020013,فتحية,العبدالله,2020-12-04 00:00:00,2020-02-15 00:00:00,2020-08-19 00:00:00,Closed,Therapy completed (received all needed services),تم الانقطاع عن الجروب بسبب قرارات الحكومة التركية بشان الكورونا و كذلك سيستمر العمل معها ببعض الجلسات الفردية
3,YQ,4,R0007,8110020016,نور,أشقر,,,,Closed,Unable to attend for family reasons,
4,YQ,5,R0006,8110020017,فاطمة,محمد,2020-11-10 00:00:00,2020-03-02 00:00:00,2021-08-05 00:00:00,Closed,Therapy completed (received all needed services),
...,...,...,...,...,...,...,...,...,...,...,...,...
169,IJ,11,R0574,8110080007,خولة,العلو,2023-09-27 00:00:00,,,Active,,
170,IJ,12,R0573,8110080008,حسناء,بصمجي,,,,Active,,
171,IJ,13,R0583,8110080011,شروف,الحميدي,,,,Closed,,
172,IJ,14,R0518,8110090004,عائشة,مصطفى,2023-09-27 00:00:00,,,Active,,


In [20]:
# All follow-up assessments csv file
follow_up.to_csv(r'001/0100_ps_fu_092300.csv', index=False)

# Follow-up Assessment Dataframe

In [21]:
fua = pd.read_csv(r'001/0100_ps_fu_092300.csv')
fua[date_columns] = fua[date_columns].apply(pd.to_datetime)
fua = fua.applymap(lambda x: x.strip() if isinstance(x, str) else x)
fua[int_columns] = fua[int_columns].astype('Int64')

In [22]:
fua.shape

(174, 12)

In [23]:
fua.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 174 entries, 0 to 173
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   fusp            174 non-null    object        
 1   fuspi           174 non-null    Int64         
 2   rid             174 non-null    object        
 3   fcid            174 non-null    Int64         
 4   firstname       174 non-null    object        
 5   lastname        174 non-null    object        
 6   3m              138 non-null    datetime64[ns]
 7   6m              111 non-null    datetime64[ns]
 8   12m             69 non-null     datetime64[ns]
 9   status          174 non-null    object        
 10  closure_reason  116 non-null    object        
 11  note            56 non-null     object        
dtypes: Int64(2), datetime64[ns](3), object(7)
memory usage: 16.8+ KB


In [25]:
# First cleaned csv file
fua.to_csv(r'010/0100_ps_fu_092301.csv', index=False)

# Follow-up Assessment Analysis Version

In [4]:
fu = pd.read_csv(r'010/0100_ps_fu_092301.csv')
fu[date_columns] = fu[date_columns].apply(pd.to_datetime)
fu[int_columns] = fu[int_columns].astype('Int64')

In [5]:
fu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 174 entries, 0 to 173
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   fusp            174 non-null    object        
 1   fuspi           174 non-null    Int64         
 2   rid             174 non-null    object        
 3   fcid            174 non-null    Int64         
 4   firstname       174 non-null    object        
 5   lastname        174 non-null    object        
 6   3m              138 non-null    datetime64[ns]
 7   6m              111 non-null    datetime64[ns]
 8   12m             69 non-null     datetime64[ns]
 9   status          174 non-null    object        
 10  closure_reason  116 non-null    object        
 11  note            56 non-null     object        
dtypes: Int64(2), datetime64[ns](3), object(7)
memory usage: 16.8+ KB


In [31]:
fu

Unnamed: 0,fusp,fuspi,rid,fcid,firstname,lastname,3m,6m,12m,status,closure_reason,note
0,YQ,1,R0130,8110010011,صباح,شيخ هلال,2021-04-07,NaT,NaT,1,-2,
1,YQ,2,R0024,8110020012,نجوى,حصرم,2020-11-24,2021-02-15,2021-08-17,1,1,تم الانقطاع عن الجروب بسبب قرارات الحكومة التركية بشان الكورونا
2,YQ,3,R0010,8110020013,فتحية,العبدالله,2020-12-04,2020-02-15,2020-08-19,1,1,تم الانقطاع عن الجروب بسبب قرارات الحكومة التركية بشان الكورونا و كذلك سيستمر العمل معها ببعض الجلسات الفردية
3,YQ,4,R0007,8110020016,نور,أشقر,NaT,NaT,NaT,1,-4,
4,YQ,5,R0006,8110020017,فاطمة,محمد,2020-11-10,2020-03-02,2021-08-05,1,1,
...,...,...,...,...,...,...,...,...,...,...,...,...
168,IJ,10,R0567,8110090007,عطاء,الزعبي,NaT,NaT,NaT,1,,
169,IJ,11,R0574,8110080007,خولة,العلو,2023-09-27,NaT,NaT,0,,
171,IJ,13,R0583,8110080011,شروف,الحميدي,NaT,NaT,NaT,1,,
172,IJ,14,R0518,8110090004,عائشة,مصطفى,2023-09-27,NaT,NaT,0,,


In [7]:
# Encoding binary variables - PT Need
fu['status'] = fu['status'].replace({
    'Active': 0,
    'Closed': 1
}).astype('Int64')

In [12]:
# Encoding closure reason values
fu['closure_reason'] = fu['closure_reason'].replace({
    'Therapy completed (received all needed services)': 1,
    'Cannot travel to ADMSP for safety/security reasons': -1,
    'Moved to another city/country': -2,
    'Schedule conflict: has a daytime job': -3,
    'Unable to attend for family reasons': -4,
    'Unable to attend for health reasons': -5,
    'Unknown (beneficiary cannot be reached)': -6,
    'Other': -7
}).astype('Int64')

In [14]:
# Get all row of clients that are not started Follow-up Assessment yet - all dates are empty and status is 0
fu_not_started = (fu[date_columns].isna().all(axis=1) & (fu['status'] == 0))
fu[fu_not_started]

Unnamed: 0,fusp,fuspi,rid,fcid,firstname,lastname,3m,6m,12m,status,closure_reason,note
88,YQ,89,R0183,8110020101,رفيق,خنوس,NaT,NaT,NaT,0,,
146,SA,53,R0599,8110050093,بسام,محمد,NaT,NaT,NaT,0,,
147,SA,54,R0487,8110080005,رياض,أسعد,NaT,NaT,NaT,0,,
148,SA,55,R0203,8110050095,أنس,عبد الله,NaT,NaT,NaT,0,,
149,SA,56,R0274,8110020090,محمد,نديوي,NaT,NaT,NaT,0,,
150,SA,57,R0473,8110050083,منال,محمد,NaT,NaT,NaT,0,,
151,SA,58,R0117,8110050055,آية,حمد,NaT,NaT,NaT,0,,
170,IJ,12,R0573,8110080008,حسناء,بصمجي,NaT,NaT,NaT,0,,


In [32]:
# Dropping rows of clients that are not started Follow-up Assessment. They shouldn't be in the data.
# find indices where fu_not_started is True
# indices_to_drop = fu.index[fu_not_started]
# fu = fu.drop(indices_to_drop)

In [None]:
# No duplicated rows.

In [33]:
# Follow-up Assessment Analysis Complete Dataframe
fu.to_csv(r'010/0100_ps_fu_092302.csv', index=False)

In [34]:
# Follow-up Analysis Basic Dataframe
basic_fu = fu.copy()
basic_fu = basic_fu.drop(columns=['firstname', 'lastname', 'note'])
basic_fu.to_csv(r'011/0100_ps_fu_092310.csv', index=False)

### FOLLOW-UP ASSESSMENT DATA PREPARED AND READY