In [None]:
import pandas as pd
from src.all_in_one import *

# Screening Dataframe

In [None]:
path = os.path.join(processed_data, 'scr_1223.csv')
sheet_name = 'Scr'
df_dataset = Dataset(config_file, sheet_name)

In [None]:
scr = pd.read_csv(path)
scr = dtype_trans(scr, df_dataset)
# output_df

In [None]:
scr

In [None]:
# browsing 'referral_source' and 'referring_organization' unique values
scr[['source', 'source_org']].drop_duplicates()

In [None]:
# mapping 'referral_source' and 'referring organization' values
values_to_replace = {
    'source': {
        'ADMSP beneficiary (current or former)': 'ADMSP_Beneficiary',
        'Social Media': 'Social_Media',
        'I/NGO or humanitarian organization': 'NGO',
        'Outreach session': 'Outreach_Session',
        'Family member or friend': 'Family_or_Friend',
        'ADMSP member': 'ADMSP_Member',
        'outreach session': 'Outreach_Session',
    },
    'source_org': {
        'تعافي': 'Taafe',
        'عائلات من أجل الحرية': 'FFF',
        'جمعية الحياة': 'ALHAYAT',
        'سامز': 'SAMS',
        'جلسة تعريفية لفريق الرابطة في الآيدا': 'IDA',
        'جلسة توعية في مركز العائلة': 'FC'
    }
}

scr.replace(values_to_replace, inplace=True)

In [None]:
# fill missing values in 'referring_organization' based on values in 'referral_source'
mapping_values = {
    'ADMSP_Beneficiary': 'NA_',
    'Social_Media': 'NA_',
    'Family_or_Friend': 'NA_',
    'ADMSP_Member': 'NA_',
    'IRM': 'NA_',
}

# Filter values in 'srs' that are keys in mapping_values dictionary
filter_values = scr['source'][scr['source'].isin(mapping_values.keys())]
# Replace values in this filtered set using mapping_values dictionary
replace_values = filter_values.replace(mapping_values)
# Fill NaN values in the 'srsorg' column with the replaced values
scr['source_org'] = scr['source_org'].fillna(replace_values)

In [None]:
scr

# Screening Analysis Version

In [None]:
scr.loc[scr['sc_loc'] == 'GTZ', 'sc_loc'] = 'GZT'

In [None]:
scr

In [None]:
# Dataset before 2023 has no variable for the location of the activity, 'scloc', since all activities implemented in GZT. 
condition = (
        ((scr['sc_s1'] < '2023-01-01') | scr['sc_s1'].isna()) &
        ((scr['pei_pre_as'] < '2023-01-01') | scr['pei_pre_as'].isna()) &
        ((scr['sc_re'] < '2023-01-01') | scr['sc_re'].isna())
)

scr.loc[condition & scr['sc_loc'].isna(), 'sc_loc'] = 'GZT'


In [None]:
# Screening Analysis Complete Dataframe 
scr.to_csv(path, index=False)

### SCREENING DATA IS READY