In [1]:
import os
import re

import sys
base_directory = "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/"
sys.path.append(f"{base_directory}")

from src.utils.config_reader import ConfigReader, Logger
from src.utils.utils import GeoDataFrameOperations, custom_preprocessing_infrastructure_return_file as custom_pre
from src.utils.file_pocessor import FileLister, FileProcessor

from src.utils.country_functions.nepal import NepalFunction

In [6]:
# Create an instance of the Logger
log_directory = "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/outputs/log"
log_file_name = "preprocessing"
logger = Logger(log_directory, log_file_name)

# Readding the configuration file to the preprocessing
config_file_path = "config/nepal/data_setup.yaml"
config_file_path = f"{base_directory}{config_file_path}"

try:
    config_data = ConfigReader.read_yaml_file(config_file_path)
    # config_data = config_data['preprocessing']
    txt_msg = "Content of {} file successfully read".format(config_file_path)
    logger.info(txt_msg)
except Exception as e:
    txt_msg = f"Error reading configuration file: {str(e)}"
    logger.error(txt_msg)

INFO: Content of /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/config/nepal/data_setup.yaml file successfully read


In [7]:

cache_files = f"{base_directory}{config_data['cache_files']}"
cache_files


'/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/cache/'

### Population

In [8]:
attribute = 'population'
txt_msg = f"Processing {attribute} attribute"
logger.info(txt_msg)
try :
    population_task = config_data[attribute]
except Exception as e:
    population_task = None
    txt_msg = f"Atribute {str(e)} not found"
    logger.error(txt_msg)

attribute_destinity = f"{cache_files}{attribute}/"
if not os.path.exists(attribute_destinity):
    os.makedirs(attribute_destinity)

INFO: Processing population attribute


In [9]:
# Processing Population task
for task in population_task:
    task_type = task['name']
    input_path = f"{base_directory}{task['source']}"
    txt_msg = f"Processing population file: {input_path}"
    logger.info(txt_msg)
    
    # Transforming raster
    try:
        gdf = FileProcessor.read_tif(input_path, 'polygon')
        gdf.columns = ['band', 'population', 'geometry']
        gdf = gdf[['population', 'geometry']]
        # gdf['geometry'] = gdf.geometry.centroid
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg)       
    
    # Saving file
    try:
        output_name = f'{task_type}.gpkg'
        FileProcessor.save_to_geopackage(gdf, attribute_destinity, output_name)
        txt_msg = f"Successfully file processed"
        logger.info(txt_msg)
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg) 


INFO: Processing population file: /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/popu/nepal_npl_ct_popu_pop_sp_py_GHS_2023_p_u_Clipped_E2020_Nepal_54009.tif


100%|██████████| 5511/5511 [01:34<00:00, 58.51it/s]


INFO: Successfully file processed


### Infrastructure

In [8]:
attribute = 'infrastructures'
txt_msg = f"Processing {attribute} attribute"
logger.info(txt_msg)
try :
    infrastructures_task = config_data[attribute]
except Exception as e:
    infrastructures_task = None
    txt_msg = f"Atribute {str(e)} not found"
    logger.error(txt_msg)

attribute_destinity = f"{cache_files}{attribute}/"
if not os.path.exists(attribute_destinity):
    os.makedirs(attribute_destinity)



INFO: Processing infrastructures attribute


In [10]:
for task in infrastructures_task:
    task_type = task['name']
    task_funtion= task['funtion']
    input_path = f"{base_directory}{task['source']}"
    
    txt_msg = f"Processing population file: {input_path}"
    logger.info(txt_msg)
    
    # Real file raster
    try:
        gdf = FileProcessor.read_geopackage(input_path)
        if task_filter is not None:
            for filter_str in task_filter:
                gdf = gdf.query(filter_str)
                gdf = gdf.reset_index(drop=True)
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg) 
    
    print(task_type)
    print(task_funtion)
    print(input_path)

INFO: Processing population file: /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg
ERROR: name 'task_filter' is not defined
health_care
nepal_health_care_type
/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg
INFO: Processing population file: /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg
ERROR: name 'task_filter' is not defined
school
["HF_T_RO!='Primary Health Care Center'"]
/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg


In [9]:
# Processing Infrastructure task
for task in infrastructures_task:
    task_type = task['name']
    task_filter= task['filter']
    input_path = f"{base_directory}{task['source']}"
    
    txt_msg = f"Processing population file: {input_path}"
    logger.info(txt_msg)
    
    # Transforming raster
    try:
        gdf = FileProcessor.read_geopackage(input_path)
        if task_filter is not None:
            for filter_str in task_filter:
                gdf = gdf.query(filter_str)
                gdf = gdf.reset_index(drop=True)
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg) 
            
    # Saving file
    try:
        output_name = f'{task_type}_infrastructure.gpkg'
        FileProcessor.save_to_geopackage(gdf, attribute_destinity, output_name)
        txt_msg = f"Successfully file processed"
        logger.info(txt_msg)
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg) 
    

INFO: Processing population file: /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg
INFO: Successfully file processed
INFO: Processing population file: /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg
INFO: Successfully file processed


### Hazards

In [10]:
attribute = 'hazards'
txt_msg = f"Processing {attribute} attribute"
logger.info(txt_msg)
try :
    hazards_task = config_data[attribute]
except Exception as e:
    hazards_task = None
    txt_msg = f"Atribute {str(e)} not found"
    logger.error(txt_msg)

attribute_destinity = f"{output_files}{attribute}/"
if not os.path.exists(attribute_destinity):
    os.makedirs(attribute_destinity)

INFO: Processing hazards attribute


In [11]:
# Processing Hazards task
for task in hazards_task:
    task_name = task['name']
    task_historical = task['historical']
    return_periods = task['return_periods']
    substantial_damage = task['substantial_damage']
    complete_destruction = task['complete_destruction']
    input_path = f"{base_directory}{task['source']}"
    

    if task_historical:
        txt_msg = f"The file contents historical records for {task_name}"
        logger.info(txt_msg)
        
        files = FileLister.list_files(input_path)
        valid_files = files
        # Read valid files 
        for file in valid_files:
            try:     
                file_name = file[0]
                file_path = file[1]  
                # Transforming raster
                gdf = FileProcessor.read_tif(file_path, 'polygon')
                # Column damage generation
                gdf = GeoDataFrameOperations.calculate_damage(gdf, substantial_damage, complete_destruction)
            except Exception as e:
                txt_msg = f"Error: '{e}'"
                logger.error(txt_msg)
            # Saving file
            try:
                output_name = f'{task_name}_historical.gpkg'
                FileProcessor.save_to_geopackage(gdf, attribute_destinity, output_name)
                txt_msg = f"Successfully file processed"
                logger.info(txt_msg)
            except Exception as e:
                txt_msg = f"{str(e)}"
                logger.error(txt_msg) 
        
        pass
    else:
        txt_msg = f"The file contents periodical records for {task_name}"
        logger.info(txt_msg)
        files = FileLister.list_files(input_path)
        
        valid_files = custom_pre.append_period_to_filelist(files, return_periods)
        # Read valid files 
        for file in valid_files:
            file_name = file[0]
            file_path = file[1]
            file_period = file[2]
            txt_msg = f" ==> Processing period {file_period}: {file_name}"
            logger.info(txt_msg)
            try:      
                # Transforming raster
                gdf = FileProcessor.read_tif(file_path, 'polygon')
                # Column damage generation
                gdf = GeoDataFrameOperations.calculate_damage(gdf, substantial_damage, complete_destruction)
            except Exception as e:
                txt_msg = f"Error: '{e}'"
                logger.error(txt_msg)         
            # Saving file
            try:
                output_name = f'{task_name}_period_{file_period}.gpkg'
                FileProcessor.save_to_geopackage(gdf, attribute_destinity, output_name)
                txt_msg = f"Successfully file processed"
                logger.info(txt_msg)
            except Exception as e:
                txt_msg = f"{str(e)}"
                logger.error(txt_msg) 
    print()


INFO: The file contents periodical records for earthquakes
INFO:  ==> Processing period 975: pga_975


100%|██████████| 361/361 [00:01<00:00, 256.22it/s]


INFO: Successfully file processed
INFO:  ==> Processing period 2475: pga_2475


100%|██████████| 361/361 [00:01<00:00, 257.02it/s]


INFO: Successfully file processed
INFO:  ==> Processing period 475: pga_rp475


100%|██████████| 361/361 [00:01<00:00, 261.00it/s]


INFO: Successfully file processed
INFO:  ==> Processing period 1500: pga_1500


100%|██████████| 361/361 [00:01<00:00, 226.12it/s]


INFO: Successfully file processed

INFO: The file contents historical records for landslides


100%|██████████| 361/361 [00:01<00:00, 225.39it/s]


INFO: Successfully file processed



### END

In [12]:
# Write different types of messages to the log file
# logger.info('Information message')
# logger.warning('Warning message')
# logger.error('Error message')
# logger.critical('Critical message')
# logger.debug('Debug message')


In [34]:
temp_variable = config_data['infrastructures'][0]['funtion']
temp_variable


nepal_instance = NepalFunction()

In [38]:
if hasattr(nepal_instance, temp_variable):
    func = getattr(nepal_instance, temp_variable)
    print(func(data))
else:
    print(f"La función {temp_variable} no está definida en MyClass.")

     GHFD_ID HF_ID_N                                         HF_N_RO HF_N_LOC  \
0       None    None                                  1,50,02,13,101     None   
1       None    None                                  2,90,05,40,101     None   
2       None    None                                  32 Dental Care     None   
3       None    None    ?? ??? ???????? ??????? ??? ????????? ??????     None   
4       None    None                                    ??? ????????     None   
...      ...     ...                                             ...      ...   
6985    None    None                              unani aushadhalaya     None   
6986    None    None  unique collage of medecal science and hospital     None   
6987    None    None                               veterinary clinic     None   
6988    None    None                                         अस्पताल     None   
6989    None    None                                   �kha Hospital     None   

            HF_T_RO HF_T_LO

In [30]:
def nepal_health_care_typo(df):
    return df


if temp_variable in globals():
    func = globals()[temp_variable]
    print(func(data))

     GHFD_ID HF_ID_N                                         HF_N_RO HF_N_LOC  \
0       None    None                                  1,50,02,13,101     None   
1       None    None                                  2,90,05,40,101     None   
2       None    None                                  32 Dental Care     None   
3       None    None    ?? ??? ???????? ??????? ??? ????????? ??????     None   
4       None    None                                    ??? ????????     None   
...      ...     ...                                             ...      ...   
6985    None    None                              unani aushadhalaya     None   
6986    None    None  unique collage of medecal science and hospital     None   
6987    None    None                               veterinary clinic     None   
6988    None    None                                         अस्पताल     None   
6989    None    None                                   �kha Hospital     None   

            HF_T_RO HF_T_LO

In [12]:
import geopandas as gpd

file_path = "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg"
data = gpd.read_file(file_path)


data

Unnamed: 0,GHFD_ID,HF_ID_N,HF_N_RO,HF_N_LOC,HF_T_RO,HF_T_LO,HF_OWN,HF_ADD_STR,HF_ADD_NO,HF_ADD_PC,...,S_COOR,M_COOR,AC_COOR,ADM1_C,ADM1_N_RO,ADM2_C,ADM2_N_RO,ADM3_C,ADM3_N_RO,geometry
0,,,1500213101,,Health Post,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B99974579,Lumbini,NPL-ADM2-48590121B71411499,ARGHAKHANCHI,NPL-ADM3-92635248B26954669,Sandhikharka,POINT (83.09822 28.02166)
1,,,2900540101,,Hospital,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B3223679,Bagmati,NPL-ADM2-48590121B38233782,RASUWA,NPL-ADM3-92635248B80242974,Gosaikunda,POINT (85.29664 28.11224)
2,,,32 Dental Care,,Dental Clinic,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B61643740,Gandaki,NPL-ADM2-48590121B48278102,KASKI,NPL-ADM3-92635248B52656349,Pokhara Lekhnath,POINT (84.05683 28.16277)
3,,,?? ??? ???????? ??????? ??? ????????? ??????,,Hospital,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B53695686,JHAPA,NPL-ADM3-92635248B18880049,Bhadrapur,POINT (88.08037 26.55999)
4,,,??? ????????,,Veterinary,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B31954132,Province 2,NPL-ADM2-48590121B93826412,RAUTAHAT,NPL-ADM3-92635248B98783072,Rajdevi,POINT (85.30761 26.76909)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6985,,,unani aushadhalaya,,Pharmacy,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B3223679,Bagmati,NPL-ADM2-48590121B68869127,LALITPUR,NPL-ADM3-92635248B95483039,Lalitpur,POINT (85.31507 27.68067)
6986,,,unique collage of medecal science and hospital,,Hospital,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B31954132,Province 2,NPL-ADM2-48590121B79767844,SAPTARI,NPL-ADM3-92635248B77151568,Rupani,POINT (86.72266 26.59899)
6987,,,veterinary clinic,,Veterinary,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B3223679,Bagmati,NPL-ADM2-48590121B82193346,KATHMANDU,NPL-ADM3-92635248B7890871,Kathmandu,POINT (85.34854 27.69868)
6988,,,अस्पताल,,Other,,,,,,...,NDRRNA,,,NPL-ADM1-38925275B99974579,Lumbini,NPL-ADM2-48590121B71411499,ARGHAKHANCHI,NPL-ADM3-92635248B78015542,Sitganga,POINT (83.05293 27.88568)


In [14]:
set(data['HF_T_RO'])

{'Aayurveda offices',
 'Ayurveda Aushadhalaya',
 'Ayurveda Hospital',
 'Ayurvedic',
 'Basic Health Care Center',
 'Basic Health Service Centre',
 'Basic Hospital',
 'C.H.U.',
 'Clinic',
 'Clinic (Eye/Medical)',
 'Community Health Unit',
 'Dental Clinic',
 'Diagnostic Center',
 'Diagnostic Center (Radiology/Laboratory/X-ray etc)',
 'Diagnostic Centre',
 'District Clinic (Including Institutional)',
 'District Hospital',
 'General Hospital',
 'General and Emergency',
 'Health Post',
 'Homeopathy Hospital',
 'Hospital',
 'Jiri Hospital',
 'MCH Clinic',
 'Medical College',
 'Medical/Pharmecy',
 None,
 'Nursing Home',
 'Other',
 'Pathology',
 'Pharmacy',
 'Poly Clinic',
 'Primary Health Care Center',
 'Primary Hospital',
 'Private Laboratory',
 'Radiology',
 'Regional Hospital',
 'Rehabilitation',
 'Specialized Hospital',
 'Sub Regional Hospital',
 'Teaching Hospital',
 'Urban Health Center',
 'Veterinary',
 'Zonal Ayurveda Aushadhalaya',
 'Zonal Hospital',
 'baby_hatch',
 'doctors',
 'nursi