In [None]:
import numpy as np
import os
import pandas as pd
import warnings

In [None]:
from Scripts.data_reader import *
from Scripts.myfunctions import *
from Scripts.parse_funcs import *

In [None]:
# Avoid representing large numbers in scientific form. To reset, use the commented line.
pd.set_option('display.float_format', '{:.1f}'.format)
# pd.reset_option('display.float_format')

# Display maximum column width:
pd.set_option('display.max_colwidth', None)

# Suppress openpyxl data validation warning
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")

In [None]:
path_to_ingredients_file = r'../static_data/ingredients.yaml'
path_to_excels_files_directory = r'../1023'

In [None]:
features = YAMLDataReader(path_to_ingredients_file)

In [None]:
passwords = features.get_excel_passwords_by_directory(path_to_excels_files_directory)

In [None]:
features.get_variable_names_by_dataset_and_level('screening', 2)

In [None]:
sheet = 'Scr'
dataset = 'screening'
sp = 'scsp'
columns_names = features.get_variable_names_by_dataset(dataset)
basic_columns = features.get_variable_names_by_dataset_and_level(dataset, 0)
date_columns = features.get_variable_names_by_dataset_and_type(dataset, 'datetime64[ns]')
int_columns = features.get_variable_names_by_dataset_and_type(dataset, 'Int64')
to_service = features.get_variable_names_by_dataset_and_level(dataset, 2)

In [None]:
decryptor = ExcelDecryptor(path_to_excels_files_directory, passwords)
tracking_tools = decryptor.read_encrypted_excels()

In [None]:
decryptor.print_data_structure

# Process Screening Sheets

In [None]:
yq = tracking_tools['tt_psc_YQ-v04.xlsx'][sheet]

In [None]:
yq.columns = columns_names
yq = yq.drop(yq.index[0]).reset_index(drop=True)
yq.dropna(subset=basic_columns, how='all', inplace=True)
yq.insert(0, sp, 'YQ')

In [None]:
sa = tracking_tools['tt_psc_SA_v04.xlsx'][sheet]

In [None]:
sa.columns = columns_names
sa = sa.drop(sa.index[0]).reset_index(drop=True)
sa.dropna(subset=basic_columns, how='all', inplace=True)
sa.insert(0, sp, 'SA')

In [None]:
ij = tracking_tools['tt_psc_IJ_v04.xlsx'][sheet]

In [None]:
ij.columns = columns_names
ij = ij.drop(ij.index[0]).reset_index(drop=True)
ij.dropna(subset=basic_columns, how='all', inplace=True)
ij.insert(0, sp, 'IJ')

In [None]:
la = tracking_tools['tt_psc_LA_v04.xlsx'][sheet]

In [None]:
la.columns = columns_names
la = la.drop(la.index[0]).reset_index(drop=True)
la.dropna(subset=basic_columns, how='all', inplace=True)
la.insert(0, sp, 'LA')

In [None]:
mt = tracking_tools['tt_psc_MT_v04.xlsx'][sheet]

In [None]:
mt.columns = columns_names
mt = mt.drop(mt.index[0]).reset_index(drop=True)
mt.dropna(subset=basic_columns, how='all', inplace=True)
mt.insert(0, sp, 'MT')

In [None]:
scr_rows = mt.shape[0] + yq.shape[0] + sa.shape[0] + ij.shape[0] + la.shape[0]
scr_rows

In [None]:
screening = pd.concat([yq, sa, ij, la, mt], ignore_index=True)

In [None]:
screening

In [None]:
# screening.to_csv(r'../1023/01/pssc_1023.csv', index=False)
screening = pd.read_csv(r'../1023/01/pssc_1023.csv')
screening

In [None]:
screening[screening['rid'] == 'R0486']
# screening.loc[screening['rid'] == 'R0486', 'sex'] = 'Male'


# Screening Dataframe

In [None]:
scr_data = pd.read_csv(r'../Data/Processed/1023/pssc_1023.csv')
# scr_data = scr_data.applymap(lambda x: x.strip() if isinstance(x, str) else x)

In [None]:
scr_data.shape

In [None]:
scr_data.info()

In [None]:
scr_data

In [None]:
# browsing 'referral_source' and 'referring_organization' unique values
scr_data[['srs', 'srsorg']].drop_duplicates()

In [None]:
# mapping 'referral_source' and 'referring organization' values
values_to_replace = {
    'srs': {
        'ADMSP beneficiary (current or former)': 'ADMSP_Beneficiary',
        'Social Media': 'Social_Media',
        'I/NGO or humanitarian organization': 'NGO',
        'Outreach session': 'Outreach_Session',
        'Family member or friend': 'Family_or_Friend',
        'ADMSP member': 'ADMSP_Member',
        'outreach session': 'Outreach_Session',
    },
    'srsorg': {
        'تعافي': 'Taafe',
        'عائلات من أجل الحرية': 'FFF',
        'جمعية الحياة': 'ALHAYAT',
        'سامز': 'SAMS',
        'جلسة تعريفية لفريق الرابطة في الآيدا': 'IDA',
        'جلسة توعية في مركز العائلة': 'FC'
    }
}

scr_data.replace(values_to_replace, inplace=True)

In [None]:
# fill missing values in 'referring_organization' based on values in 'referral_source'
mapping_values = {
    'ADMSP_Beneficiary': 'NA_',
    'Social_Media': 'NA_',
    'Family_or_Friend': 'NA_',
    'ADMSP_Member': 'NA_',
    'IRM': 'NA_',
}

# Filter values in 'srs' that are keys in mapping_values dictionary
filter_values = scr_data['srs'][scr_data['srs'].isin(mapping_values.keys())]
# Replace values in this filtered set using mapping_values dictionary
replace_values = filter_values.replace(mapping_values)
# Fill NaN values in the 'srsorg' column with the replaced values
scr_data['srsorg'] = scr_data['srsorg'].fillna(replace_values)

In [None]:
scr_data

In [None]:
scr_data[int_columns] = scr_data[int_columns].astype('Int64')
scr_data[date_columns] = scr_data[date_columns].apply(pd.to_datetime)

In [None]:
# scr_data.to_csv(r'010/0000_ps_sc_092301.csv', index=False)
# scr.to_csv(r'010/0000_ps_sc_092301.csv', index=False)
scr = pd.read_csv(r'010/0000_ps_sc_092301.csv')
# scr.loc[scr['rid'] == 'R0486', 'sex'] = 'Male'
scr[scr['rid'] == 'R0486']


# Screening Analysis Version

In [None]:
scr = pd.read_csv(r'010/0000_ps_sc_092301.csv')

In [None]:
scr

In [None]:
scr[date_columns] = scr[date_columns].apply(pd.to_datetime)
scr[int_columns] = scr[int_columns].astype('Int64')


In [None]:
scr.info()

In [None]:
scr

In [None]:
scr['firstname'].replace('\\', None, inplace=True)
scr.loc[scr['scloc'] == 'GTZ', 'scloc'] = 'GZT'


In [None]:
scr

In [None]:
# Dataset before 2023 has no variable for the location of the activity, 'scloc', since all activities implemented in GZT. 
condition = (
    ((scr['sc1'] < '2023-01-01') | scr['sc1'].isna()) &
    ((scr['sc2'] < '2023-01-01') | scr['sc2'].isna()) &
    ((scr['scre'] < '2023-01-01') | scr['scre'].isna())
)

scr.loc[condition & scr['scloc'].isna(), 'scloc'] = 'GZT'


In [None]:
# Encoding binary variables
scr['sex'] = scr['sex'].replace({
    'Female': 1,
    'Male': 2
}).astype('Int64')

In [None]:
scr[to_service] = scr[to_service].replace({
    'Yes': 1,
    'No': 0
}).astype('Int64')

In [None]:
if 'sert' not in scr.columns:
    scr.insert(19, 'sert', scr[to_service].sum(axis=1))

In [None]:
scr.info()

In [None]:
# Screening Analysis Complete Dataframe 
scr.to_csv(r'010/0000_ps_sc_092302.csv', index=False)

In [None]:
# Screening Analysis Basic Dataframe (this dataframe for analysis work - without unnecessary variables)
basic_scr = scr.copy()
basic_scr = basic_scr.drop(columns=['firstname', 'lastname', 'nat', 'not'])
basic_scr.to_csv(r'011/0000_ps_sc_092310.csv', index=False)


### SCREENING DATA IS READY