In [1]:
import os
import re

import sys
base_directory = "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/"
sys.path.append(f"{base_directory}")

In [2]:
from src.utils.config_reader import ConfigReader, Logger
from src.utils.utils import GeoDataFrameOperations, custom_preprocessing_infrastructure_return_file as custom_pre
from src.utils.file_pocessor import FileLister, FileProcessor

In [3]:
# Create an instance of the Logger
log_directory = "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/outputs/log"
log_file_name = "preprocessing"
logger = Logger(log_directory, log_file_name)

In [4]:
# Readding the configuration file to the preprocessing
config_file_path = "config/nepal/data_setup.yaml"
config_file_path = f"{base_directory}{config_file_path}"

try:
    config_data = ConfigReader.read_yaml_file(config_file_path)
    # config_data = config_data['preprocessing']
    txt_msg = "Content of {} file successfully read".format(config_file_path)
    logger.info(txt_msg)
except Exception as e:
    txt_msg = f"Error reading configuration file: {str(e)}"
    logger.error(txt_msg)

INFO: Content of /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/config/nepal/data_setup.yaml file successfully read


In [5]:
# Write different types of messages to the log file
# logger.info('Information message')
# logger.warning('Warning message')
# logger.error('Error message')
# logger.critical('Critical message')
# logger.debug('Debug message')


In [6]:

output_files = f"{base_directory}{config_data['output_files']}"
output_files


'/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/outputs/'

### Population

In [7]:
attribute = 'population'
try :
    population_task = config_data[attribute]
except Exception as e:
    population_task = None
    txt_msg = f"Atribute {str(e)} not found"
    logger.error(txt_msg)

attribute_destinity = f"{output_files}{attribute}/"
if not os.path.exists(attribute_destinity):
    os.makedirs(attribute_destinity)

In [8]:
# Processing Population task
for task in population_task:
    task_type = task['name']
    input_path = f"{base_directory}{task['source']}"
    txt_msg = f"Processing population file: {input_path}"
    logger.info(txt_msg)
    
    # Transforming raster
    try:
        gdf = FileProcessor.read_tif(input_path, 'polygon')
        gdf.columns = ['band', 'population', 'geometry']
        gdf = gdf[['population', 'geometry']]
        # gdf['geometry'] = gdf.geometry.centroid
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg)       
    
    # Saving file
    try:
        output_name = f'{task_type}.gpkg'
        FileProcessor.save_to_geopackage(gdf, attribute_destinity, output_name)
        txt_msg = f"Successfully file processed"
        logger.info(txt_msg)
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg) 


INFO: Processing population file: /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/popu/nepal_npl_ct_popu_pop_sp_py_GHS_2023_p_u_Clipped_E2020_Nepal_54009.tif


100%|██████████| 5511/5511 [01:30<00:00, 60.88it/s] 


INFO: Successfully file processed


### Infrastructure

In [9]:
attribute = 'infrastructures'
try :
    infrastructures_task = config_data[attribute]
except Exception as e:
    infrastructures_task = None
    txt_msg = f"Atribute {str(e)} not found"
    logger.error(txt_msg)

attribute_destinity = f"{output_files}{attribute}/"
if not os.path.exists(attribute_destinity):
    os.makedirs(attribute_destinity)



In [10]:
# Processing Infrastructure task
for task in infrastructures_task:
    task_type = task['name']
    task_filter= task['filter']
    input_path = f"{base_directory}{task['source']}"
    
    txt_msg = f"Processing population file: {input_path}"
    logger.info(txt_msg)
    
    # Transforming raster
    try:
        gdf = FileProcessor.read_geopackage(input_path)
        if task_filter is not None:
            for filter_str in task_filter:
                gdf = gdf.query(filter_str)
                gdf = gdf.reset_index(drop=True)
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg) 
            
    # Saving file
    try:
        output_name = f'{task_type}_infrastructure.gpkg'
        FileProcessor.save_to_geopackage(gdf, attribute_destinity, output_name)
        txt_msg = f"Successfully file processed"
        logger.info(txt_msg)
    except Exception as e:
        txt_msg = f"{str(e)}"
        logger.error(txt_msg) 
    

INFO: Processing population file: /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg
INFO: Successfully file processed
INFO: Processing population file: /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/heal/nepal_npl_ct_heal_heal_sp_tab_NDRRNA_14022024.gpkg
INFO: Successfully file processed


### Hazards

In [11]:
attribute = 'hazards'
try :
    hazards_task = config_data[attribute]
except Exception as e:
    hazards_task = None
    txt_msg = f"Atribute {str(e)} not found"
    logger.error(txt_msg)

attribute_destinity = f"{output_files}{attribute}/"
if not os.path.exists(attribute_destinity):
    os.makedirs(attribute_destinity)

In [12]:
# Processing Hazards task
for task in hazards_task:
    task_name = task['name']
    task_historical = task['historical']
    return_periods = task['return_periods']
    substantial_damage = task['substantial_damage']
    complete_destruction = task['complete_destruction']
    input_path = f"{base_directory}{task['source']}"
    

    if task_historical:
        txt_msg = f"The file contents historical records for {task_name}"
        logger.info(txt_msg)
        
        files = FileLister.list_files(input_path)
        valid_files = files
        # Read valid files 
        for file in valid_files:
            try:     
                file_name = file[0]
                file_path = file[1]  
                # Transforming raster
                gdf = FileProcessor.read_tif(file_path, 'polygon')
                # Column damage generation
                gdf = GeoDataFrameOperations.calculate_damage(gdf, substantial_damage, complete_destruction)
            except Exception as e:
                txt_msg = f"Error: '{e}'"
                logger.error(txt_msg)
            # Saving file
            try:
                output_name = f'{task_name}_historical.gpkg'
                FileProcessor.save_to_geopackage(gdf, attribute_destinity, output_name)
                txt_msg = f"Successfully file processed"
                logger.info(txt_msg)
            except Exception as e:
                txt_msg = f"{str(e)}"
                logger.error(txt_msg) 
        
        pass
    else:
        txt_msg = f"The file contents periodical records for {task_name}"
        logger.info(txt_msg)
        files = FileLister.list_files(input_path)
        
        valid_files = custom_pre.append_period_to_filelist(files, return_periods)
        # Read valid files 
        for file in valid_files:
            file_name = file[0]
            file_path = file[1]
            file_period = file[2]
            txt_msg = f" ==> Processing period {file_period}: {file_name}"
            logger.info(txt_msg)
            try:      
                # Transforming raster
                gdf = FileProcessor.read_tif(file_path, 'polygon')
                # Column damage generation
                gdf = GeoDataFrameOperations.calculate_damage(gdf, substantial_damage, complete_destruction)
            except Exception as e:
                txt_msg = f"Error: '{e}'"
                logger.error(txt_msg)         
            # Saving file
            try:
                output_name = f'{task_name}_period_{file_period}.gpkg'
                FileProcessor.save_to_geopackage(gdf, attribute_destinity, output_name)
                txt_msg = f"Successfully file processed"
                logger.info(txt_msg)
            except Exception as e:
                txt_msg = f"{str(e)}"
                logger.error(txt_msg) 
    print()
        
        
        

                
      

INFO: The file contents periodical records for earthquakes
INFO:  ==> Processing period 975: pga_975


100%|██████████| 361/361 [00:01<00:00, 255.61it/s]


INFO: Successfully file processed
INFO:  ==> Processing period 2475: pga_2475


100%|██████████| 361/361 [00:01<00:00, 258.15it/s]


INFO: Successfully file processed
INFO:  ==> Processing period 475: pga_rp475


100%|██████████| 361/361 [00:01<00:00, 257.34it/s]


INFO: Successfully file processed
INFO:  ==> Processing period 1500: pga_1500


100%|██████████| 361/361 [00:01<00:00, 236.89it/s]


INFO: Successfully file processed

INFO: The file contents historical records for landslides


100%|██████████| 361/361 [00:01<00:00, 228.87it/s]


INFO: Successfully file processed



### END