In [2]:
import sys
base_directory = "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/"
sys.path.append(f"{base_directory}")

In [3]:
import pandas as pd
import geopandas as gpd
from tqdm import tqdm

from src.utils.config_reader import ConfigReader, Logger
from src.utils.utils import GeoDataFrameOperations
from src.utils.file_pocessor import FileLister, FileProcessor


In [4]:
# Create an instance of the Logger
log_directory = "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/outputs/log"
log_file_name = "processing"
logger = Logger(log_directory, log_file_name)

In [4]:
# Readding the configuration file to the preprocessing
config_file_path = "config/nepal/data_setup.yaml"
config_file_path = f"{base_directory}{config_file_path}"

try:
    config_data = ConfigReader.read_yaml_file(config_file_path)
    txt_msg = "Content of {} file successfully read".format(config_file_path)
    logger.info(txt_msg)
except Exception as e:
    txt_msg = f"Error reading configuration file: {str(e)}"
    logger.error(txt_msg)

INFO: Content of /Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/config/nepal/data_setup.yaml file successfully read


In [5]:
# Output files
cache_files = f"{base_directory}{config_data['output_files']}"
cache_files

# Output task files
output_task = f"{base_directory}{config_data['output_task']}"
output_task

# Administrative region
path_map = f"{base_directory}{config_data['adminitrative_maps']}"
data_map = FileProcessor.read_geopackage(path_map)
    
# Hazards list files
path_hazards = f"{cache_files}hazards/"
data_hazards = FileLister.list_files(path_hazards)

In [18]:
def compute_exposure_in_points(object_file, hazard_files):
    data_object = FileProcessor.read_geopackage(object_file)
    data_object = data_object.reset_index(drop=False)
    data_object = data_object.rename(columns={'index': 'temp_index'})
    data_object_temp = data_object[['temp_index','geometry']]
    data_exposure = data_object_temp.copy()
    subtask_bar = tqdm(total=100, desc=f'Hazard ', leave=False)
    for file in hazard_files:
        hazard_name = file[0]
        print(hazard_name)
        data_hazard = FileProcessor.read_geopackage(file[1])
        data_hazard = data_hazard.rename(columns={'damage': hazard_name})
        data_hazard = gpd.sjoin(data_object_temp, data_hazard, how='left', predicate='intersects')
        data_hazard[hazard_name] = data_hazard[hazard_name].fillna('no damage')
        data_hazard = data_hazard[['temp_index', hazard_name]]
        data_exposure = gpd.GeoDataFrame.merge(data_exposure, data_hazard, on='temp_index', suffixes=('', '_gdf2'))
        del data_hazard
        subtask_bar.update(100/len(hazard_files))
    del data_object_temp
    del data_exposure['geometry']
    data_exposure = gpd.GeoDataFrame.merge(data_object, data_exposure, on='temp_index', suffixes=('', '_gdf2'))
    del data_exposure['temp_index']
    subtask_bar.close()
    return data_exposure

In [19]:
# Task_list
tasks = config_data['tasks']
main_bar = tqdm(total=len(tasks) * 100, desc="Task")
counter = 0
for subtasks in tasks:
    
    subtask_bar = tqdm(total=100, desc=f'Subtask {counter + 1}', leave=False)
    task_name = subtasks['name']
    task_type = subtasks['type']
    
    if task_type == "exposure_population":
        path_populations = f"{base_directory}{subtasks['population_sources']}"
        data_populations = FileLister.list_files(path_populations)
        for object_file in data_populations:
            object_name = object_file[0]
            object_file = object_file[1]
            print(f'Prosessing: {object_name}')
            data_exposure = compute_exposure_in_points(object_file, data_hazards)
            columns_to_keep = [x for x in data_exposure.columns if x not in data_map.columns] + ['geometry']
            data_exposure = gpd.sjoin(data_exposure[columns_to_keep], data_map, how='left', predicate='intersects')
            del data_exposure['index_right']
            output_name = f'{task_type}_{object_name}.gpkg'
            FileProcessor.save_to_geopackage(data_exposure, output_task, output_name)
            
            # task status
            subtask_bar.update(int(100/len(data_populations)))
            main_bar.update(int(100/len(tasks)))
            
    
    elif task_type == "exposure_infrastructure":
        path_infrastructures = f"{base_directory}{subtasks['infrastructures_sources']}"
        data_infrastructures = FileLister.list_files(path_infrastructures)
        resultado = data_infrastructures
        for object_file in data_infrastructures:
            object_name = object_file[0]
            object_file = object_file[1]
            print(f'Prosessing: {object_name}')
            data_exposure = compute_exposure_in_points(object_file, data_hazards)
            columns_to_keep = [x for x in data_exposure.columns if x not in data_map.columns] + ['geometry']
            data_exposure = gpd.sjoin(data_exposure[columns_to_keep], data_map, how='left', predicate='intersects')
            del data_exposure['index_right']
            output_name = f'{task_type}_{object_name}.gpkg'
            FileProcessor.save_to_geopackage(data_exposure, output_task, output_name)
            
            # task status
            subtask_bar.update(int(100/len(data_infrastructures)))
            main_bar.update(int(100/len(tasks)))
    
    elif task_type == "risk_index":
        pass
    else:
        pass
    
    subtask_bar.close()
    counter+=1
main_bar.close()

Task:   0%|          | 0/200 [00:50<?, ?it/s]

[A

Prosessing: population_54009



[A

earthquakes_period_475



[A

landslides_historical



[A

earthquakes_period_1500



[A

earthquakes_period_2475



[A

earthquakes_period_975



[A
[A

DriverError: sqlite3_open(/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/outputs/maps/exposure_population_population_54009.gpkg) failed: unable to open database file

In [None]:
jkn

In [None]:
data_exposure.head()
data_exposure.columns

In [None]:


from tqdm import tqdm
import time

# Número total de subtareas
num_subtasks = 10

# Configuración de la barra de progreso principal
main_bar = tqdm(total=num_subtasks * 100, desc="Tarea principal")

# Simulación de la tarea con subtareas
for subtask in range(num_subtasks):
    subtask_bar = tqdm(total=100, desc=f"Subtarea {subtask + 1}", leave=False)
    for _ in range(10):  # Simulamos 10 etapas para cada subtarea
        time.sleep(0.1)  # Simulamos una pequeña espera
        subtask_bar.update(10)  # Avanzamos 10 unidades en cada etapa
        main_bar.update(
            10
        )  # Avanzamos también la barra principal en cada etapa de subtarea
    subtask_bar.close()  # Cerramos la barra de progreso de la subtarea

# Cerramos la barra de progreso principal
main_bar.close()


In [None]:
from src.utils.config_reader import ConfigReader

config_reader = ConfigReader()
config_file_path = "config/nepal/setup_preprocessing.yaml"
config_data = config_reader.read_configuration_file(f"{base_directory}{config_file_path}")
config_data = config_data['preprocessing']

# population_exposure_config = None
# for task in config_data:
#     print(task)
    
config_data

In [None]:
import os
import sys

In [None]:
import pandas as pd
import numpy as np
import shapefile as shp
import osmnx as ox
import contextily as ctx
import geopandas as gpd
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D  # for legend handle
from matplotlib_scalebar.scalebar import ScaleBar
from sklearn.metrics.pairwise import haversine_distances
from shapely.geometry import Point
from pyproj import Proj, transform
import math
from matplotlib.colors import Normalize

# sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "../src"))
# import functions_support as fsupport

import importlib

# importlib.reload(fsupport)

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

import hashlib
from datetime import datetime, timedelta

In [None]:
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems"))


In [None]:
import rasterio
import pandas as pd
import numpy as np
from shapely.geometry import Polygon
import geopandas as gpd
from tqdm import tqdm

In [None]:
input_path = "../data/nepal/inputs/pga_475y.tif"
input_path2 = "../data/nepal/inputs/pga_specifications.xlsx"
output_path = "../data/nepal/outputs/map/Peak_Ground_Acceleration.gpkg"


In [None]:
# Abre el archivo TIF
with rasterio.open(input_path) as src:
    # Imprime información sobre el archivo
    print(src.profile)

    # Lee todas las bandas y guarda los valores en una matriz
    data = src.read()

    # Imprime el número de bandas y el tamaño de la matriz
    print(f'Número de bandas: {src.count}')
    print(f'Tamaño de la matriz: {data.shape}')



In [None]:
# Definir una función para transformar las coordenadas de los píxeles a coordenadas de mapa
def pixel_to_map_coordinates(transform, col, row):
    x, y = transform * (col, row)
    return x, y

# Abrir el archivo TIF
with rasterio.open(input_path) as src:
    # Leer todas las bandas y guardar los valores en una matriz
    data = src.read()

    # Obtener la transformación de coordenadas de píxeles a coordenadas de mapa
    transform = src.transform

    # Crear una lista vacía para almacenar los datos de cada polígono
    polygons = []

    # Iterar sobre cada banda y crear polígonos para cada píxel con valor distinto de cero
    for i in range(src.count):
        band_data = data[i, :, :]

        for row in tqdm(range(band_data.shape[0])):
            for col in range(band_data.shape[1]):
                # Obtener el valor del píxel
                value = band_data[row, col]

                # Si el valor es cero, ignorar el píxel
                if value > 0:
                    #continue

                    # Calcular las coordenadas de los cuatro vértices del polígono
                    x1, y1 = pixel_to_map_coordinates(transform, col, row)
                    x2, y2 = pixel_to_map_coordinates(transform, col + 1, row)
                    x3, y3 = pixel_to_map_coordinates(transform, col + 1, row + 1)
                    x4, y4 = pixel_to_map_coordinates(transform, col, row + 1)

                    # Crear el polígono a partir de los vértices
                    poly = Polygon([(x1, y1), (x2, y2), (x3, y3), (x4, y4)])

                    # Agregar el polígono y su información a la lista de polígonos
                    polygon_data = {
                        'band': i+1,
                        'value': value,
                        'geometry': poly
                    }
                    polygons.append(polygon_data)
                    
# Crear un GeoDataFrame a partir de la lista de polígonos
gdf = gpd.GeoDataFrame(polygons)


In [None]:
# Plot the GeoDataFrame with colors based on the 'value' column
gdf.plot(column='value', cmap='viridis', legend=True, figsize=(10, 10))

# Add title and labels
plt.title('Map with Colors per Value Column')
plt.xlabel('Longitude')
plt.ylabel('Latitude')

# Show the plot
plt.show()

In [None]:

pga_specifications = pd.read_excel(f"{input_path2}")  

# dtype_mapping = {
#     'Acceleration_min': float,
#     'Acceleration_max': float, 
# }

# pga_specifications = pd.read_csv(f"{input_path2}", dtype=dtype_mapping)
pga_specifications

In [None]:
gdf

In [None]:
def identify_interval(value, df, min_col, max_col):
    for index, row in df.iterrows():
        min_value = row[min_col]
        max_value = row[max_col]
        if min_value <= value and value < max_value:
            return row['Instrumental_Intensity'], row['Acceleration_g'], row['Velocity_cmxs'], row['Perceived_shaking'], row['Potential_damage']
        

def gdf_interval_df(gdf, df, value_col, min_col, max_col):
    '''
    '''
    gdf2 = gdf.copy()
    resultado = gdf2.apply(lambda x: identify_interval(x[value_col], df, min_col, max_col), axis=1)
    gdf2['Instrumental_Intensity'] = [x[0] for x in resultado]
    gdf2['Acceleration_g'] = [x[1] for x in resultado]
    gdf2['Velocity_cmxs'] = [x[2] for x in resultado]
    gdf2['Perceived_shaking'] = [x[3] for x in resultado]
    gdf2['Potential_damage'] = [x[4] for x in resultado]

    return gdf2

gdf2 = gdf_interval_df(gdf, pga_specifications, value_col='value', min_col='Acceleration_min', max_col='Acceleration_max')
gdf2

In [None]:
gdf2.to_file(output_path, driver="GPKG")

In [None]:
set(gdf2['Instrumental_Intensity'])

In [20]:
import pandas as pd

file_path_dta = '/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/dhs/NP_2021_SPA_02232024_1120_204363/NPAN8ADTSR/NPAN8AFLSR.DTA'

df_dta = pd.read_stata(file_path_dta)

In [21]:
df_dta

Unnamed: 0,inv_id,p_id,obs_n,obs_id,c000,c001,c002,c003,c004,c004p,...,soa116dc,soa116dd,soa116de,soa116dy,sxa108a,sxa111a,sxa111b,sxa111c,sxa111d,sxa128a
0,4,3,AN,101,NP8,province 1,taplejung,rural,4,3,...,no,no,no,yes,no,no,no,no,no,no
1,9,4,AN,101,NP8,province 1,taplejung,rural,9,4,...,no,no,no,yes,,"yes, previous visit only","yes, this visit only",no,no,no
2,13,4,AN,102,NP8,province 1,taplejung,urban,13,4,...,no,no,no,yes,,"yes, previous visit only","yes, this and previous visit",no,no,no
3,13,4,AN,103,NP8,province 1,taplejung,urban,13,4,...,no,no,no,yes,,no,"yes, this visit only",no,no,no
4,13,4,AN,106,NP8,province 1,taplejung,urban,13,4,...,no,no,no,yes,,"yes, previous visit only","yes, this and previous visit",no,no,no
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1986,1632,31,AN,101,NP8,karnali province,jumla,urban,1632,31,...,no,no,no,yes,,"yes, previous visit only","yes, this and previous visit",no,no,no
1987,1632,31,AN,102,NP8,karnali province,jumla,urban,1632,31,...,no,no,no,yes,,"yes, this visit only","yes, this and previous visit",no,no,no
1988,1632,31,AN,103,NP8,karnali province,jumla,urban,1632,31,...,no,no,no,yes,,"yes, previous visit only",no,no,no,no
1989,1632,31,AN,104,NP8,karnali province,jumla,urban,1632,31,...,no,no,no,yes,,"yes, this visit only","yes, this visit only",no,no,no


In [31]:
for i in df_dta.columns:
    print(i)

inv_id
p_id
obs_n
obs_id
c000
c001
c002
c003
c004
c004p
c004ct
c004cn
c005
c005a
c005b
c007
c008
c009a
c009b
c009c
c020a
c020b
c021
c022
c023
c024
c032
c033
c033d
c501
c502a
c502b
c502c
c502e
c502f
c502g
c502h
c502i
c502j
c502k
c502l
c503
c504
c505
c507
c508
c510
c511
c512
c514
c515
c517a
c517b
c518a
c518b
c520
c521
c522
sx205a
sx303
sx304a
sx304c
sx305a
c101a
c101f
c102
c102f
c103a
c103b
c103c
c103d
c103e
c103f
c103g
c103y
c104a
c104b
c104c
c104d
c104e
c104f
c104g
c104h
c104i
c104j
c104k
c104l
c104m
c104n
c104o
c104p
c104q
c104y
c105a
c105b
c105c
c105d
c105e
c105f
c105g
c105h
c105i
c105j
c105k
c105x
c105y
c105a1
c105b1
c105c1
c105d1
c105e1
c105f1
c105g1
c105h1
c105i1
c105j1
c105k1
c105x1
c105y1
c106a
c106b
c106c
c106d
c106k
c106l
c106m
c106n
c106o
c106p
c106q
c106r
c106s
c106y
cf106a
cf106k
cf106xa
cf106xb
cf106xc
cf106xd
cf106xe
cf106xf
cf106xg
c107a
c107b
c107c
c107d
c107g
c107e
c107f
c107h
c107p
c107q
c107y1
c107y2
c107y3
c108
c110
c111
c112
c111a
c111b
c111c
c111e
c111f
c111y
c113

In [23]:
import geopandas as gpd

file_path_gpkg = "/Users/johnbarrera/Documents/Projects/world_bank/Climate-and-Disaster-Risk-Management-for-Health-Systems/data/nepal/inputs/adm/nepal_npl_ct_admn_ad3_sp_py_NEPAL_GEOBOUNDARIES_14022024_p_a_geoboundaries.gpkg"

gdf = gpd.read_file(file_path_gpkg)


In [24]:
gdf.head()

Unnamed: 0,ADM1_C,ADM1_N_RO,ADM2_C,ADM2_N_RO,ADM3_C,ADM3_N_RO,geometry
0,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B86918736,Aathrai Tribeni,"POLYGON ((87.62458 27.36214, 87.62375 27.36123..."
1,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B24938346,Maiwakhola,"POLYGON ((87.60866 27.37094, 87.60865 27.37084..."
2,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B98659480,Meringden,"POLYGON ((87.62458 27.36214, 87.62417 27.36218..."
3,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B49016341,Mikwakhola,"POLYGON ((87.62995 27.37658, 87.62958 27.37743..."
4,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B96689795,Phaktanglung,"POLYGON ((87.68733 27.40826, 87.68720 27.40822..."


In [32]:
gdf[gdf['ADM2_N_RO']=='TAPLEJUNG']

Unnamed: 0,ADM1_C,ADM1_N_RO,ADM2_C,ADM2_N_RO,ADM3_C,ADM3_N_RO,geometry
0,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B86918736,Aathrai Tribeni,"POLYGON ((87.62458 27.36214, 87.62375 27.36123..."
1,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B24938346,Maiwakhola,"POLYGON ((87.60866 27.37094, 87.60865 27.37084..."
2,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B98659480,Meringden,"POLYGON ((87.62458 27.36214, 87.62417 27.36218..."
3,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B49016341,Mikwakhola,"POLYGON ((87.62995 27.37658, 87.62958 27.37743..."
4,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B96689795,Phaktanglung,"POLYGON ((87.68733 27.40826, 87.68720 27.40822..."
5,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B96447016,Phungling,"POLYGON ((87.68119 27.30966, 87.68085 27.31012..."
6,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B52009768,Sidingba,"POLYGON ((87.83383 27.28345, 87.83463 27.28432..."
7,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B22195033,Sirijangha,"POLYGON ((87.80852 27.45406, 87.81034 27.45400..."
8,NPL-ADM1-38925275B72368611,Province 1,NPL-ADM2-48590121B80122498,TAPLEJUNG,NPL-ADM3-92635248B63709699,Yangwarak,"POLYGON ((87.69880 27.29106, 87.69869 27.29132..."


In [26]:
set(gdf['ADM2_N_RO'])

{'ACHHAM',
 'ARGHAKHANCHI',
 'BAGLUNG',
 'BAITADI',
 'BAJHANG',
 'BAJURA',
 'BANKE',
 'BARA',
 'BARDIYA',
 'BHAKTAPUR',
 'BHOJPUR',
 'CHITAWAN',
 'DADELDHURA',
 'DAILEKH',
 'DANG',
 'DARCHULA',
 'DHADING',
 'DHANKUTA',
 'DHANUSHA',
 'DOLAKHA',
 'DOLPA',
 'DOTI',
 'GORKHA',
 'GULMI',
 'HUMLA',
 'ILAM',
 'JAJARKOT',
 'JHAPA',
 'JUMLA',
 'KABHREPALANCHOK',
 'KAILALI',
 'KALIKOT',
 'KANCHANPUR',
 'KAPILBASTU',
 'KASKI',
 'KATHMANDU',
 'KHOTANG',
 'LALITPUR',
 'LAMJUNG',
 'MAHOTTARI',
 'MAKAWANPUR',
 'MANANG',
 'MORANG',
 'MUGU',
 'MUSTANG',
 'MYAGDI',
 'NAWALPARASI_E',
 'NAWALPARASI_W',
 'NUWAKOT',
 'OKHALDHUNGA',
 'PALPA',
 'PANCHTHAR',
 'PARBAT',
 'PARSA',
 'PYUTHAN',
 'RAMECHHAP',
 'RASUWA',
 'RAUTAHAT',
 'ROLPA',
 'RUKUM_E',
 'RUKUM_W',
 'RUPANDEHI',
 'SALYAN',
 'SANKHUWASABHA',
 'SAPTARI',
 'SARLAHI',
 'SINDHULI',
 'SINDHUPALCHOK',
 'SIRAHA',
 'SOLUKHUMBU',
 'SUNSARI',
 'SURKHET',
 'SYANGJA',
 'TANAHU',
 'TAPLEJUNG',
 'TERHATHUM',
 'UDAYAPUR'}

In [27]:
set(gdf['ADM1_N_RO'])

{'Bagmati',
 'Gandaki',
 'Karnali',
 'Lumbini',
 'Province 1',
 'Province 2',
 'Sudurpaschim'}