In [None]:
# First we import some libraries. 
import pathlib
from pathlib import Path
import os
import sys
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd


In [None]:
from new_helpers_may_17 import PopEstimator

In [None]:
# set directories 
base_path = pathlib.Path.cwd().parent.parent

pop_dat_dir = base_path / "GHSL" / "1km" 

ghsl_2000 = pop_dat_dir / "GHS_POP_E2000_GLOBE_R2023A_54009_1000_V1_0" / "GHS_POP_E2000_GLOBE_R2023A_54009_1000_V1_0.tif"
ghsl_2005 = pop_dat_dir / "GHS_POP_E2005_GLOBE_R2023A_54009_1000_V1_0" / "GHS_POP_E2005_GLOBE_R2023A_54009_1000_V1_0.tif"
ghsl_2010 = pop_dat_dir / "GHS_POP_E2010_GLOBE_R2023A_54009_1000_V1_0" / "GHS_POP_E2010_GLOBE_R2023A_54009_1000_V1_0.tif"
ghsl_2015 = pop_dat_dir / "GHS_POP_E2015_GLOBE_R2023A_54009_1000_V1_0" / "GHS_POP_E2015_GLOBE_R2023A_54009_1000_V1_0.tif"
ghsl_2020 = pop_dat_dir / "GHS_POP_E2020_GLOBE_R2023A_54009_1000_V1_0" / "GHS_POP_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"


wf_dat = base_path / "national_wf_disaster_hosp" / "local_data" / "monthly_wf_exposure"

all_wf_dat = wf_dat / "all_analysis"
# main_wf_dat = wf_dat / "main_analysis"
# sensitivity_larger = wf_dat / "sensitivity_larger"
# sensitivity_smaller = wf_dat / "sensitivity_smaller"

zctas_2020 = base_path / "national_wf_disaster_hosp" / "local_data" / "zctas_2020.parquet"

In [None]:
# make a list of paths that we're going to use for each month 
ghsl_paths = [ghsl_2000, ghsl_2005, ghsl_2010, ghsl_2015, ghsl_2020]

# rep pattern
rep_pattern = [3*12, 5*12 ,5*12 ,5*12 ,1*12]

# list of ghsls to use for months
repeated_paths = [path for path, count in zip(ghsl_paths, rep_pattern) for _ in range(count)]


In [None]:
all_wf_exposure = sorted([all_wf_dat / file for file in os.listdir(all_wf_dat) if 'month' in file])

In [None]:
print(all_wf_exposure)

In [None]:
est = PopEstimator()

In [None]:
zctas = est.prepare_data(path_to_data=zctas_2020, geo_type='spatial_unit')

In [None]:
zctas.plot()

In [None]:
from tqdm import tqdm

wfs = []
for i in tqdm(range(len(all_wf_exposure)), desc="Preparing hazard data"):
    wf = est.prepare_data(path_to_data=all_wf_exposure[i], geo_type='hazard')
    wfs.append(wf)

In [None]:
print(wfs[200].head())
wfs[200].crs

In [None]:
exposed_pop_df = est.estimate_exposed_pop(
            pop_path=repeated_paths[227], hazards=wfs[227], hazard_specific=False,
            spatial_units=zctas)
    

In [None]:
exposed_pop_df.head()

In [None]:
exposed_pop = []
for i in tqdm(range(len(wfs)), desc="Calculating exposed population"):
    exposed_pop_df = est.estimate_exposed_pop(
        pop_path=repeated_paths[i], hazards=wfs[i], hazard_specific=False
    )
    exposed_pop_df["month"] = i + 1 # add month column
    exposed_pop.append(exposed_pop_df)

In [None]:
# Combine all the dataframes into one, joining on ID hazard and ID spatilal unit
combined_df = pd.concat(exposed_pop, ignore_index=True)

In [None]:
print(combined_df)

In [None]:
pops = []
for i in tqdm(range(len(ghsl_paths)), desc='Calculating ZCTA pop'):
    pop = est.estimate_pop(pop_path=ghsl_paths[i],
                           spatial_units=zctas)
    pops.append(pop)
    

In [None]:
from concurrent.futures import ProcessPoolExecutor
import pandas as pd

# Define a function to process a single file
def process_file(i):
    # Extract the month from the first 15 characters of the file name
    month = str(main_wf_exposure[i-1].name[6:16])
    
    # Perform the exposure calculation
    monthly_exposure = find_exposure.find_num_people_affected_by_geo(
        path_to_hazards=main_wf_exposure[i-1],
        path_to_additional_geos=zctas_2020,
        raster_path=repeated_paths[i-1],
        by_unique_hazard=False
    )
    
    # Add the month column to the result
    monthly_exposure['month'] = month
    
    return monthly_exposure

# Run the processing in parallel
results = []
with ProcessPoolExecutor() as executor:
    # Map the process_file function to the range of indices
    results = list(executor.map(process_file, range(1, len(main_wf_exposure) + 1)))

# Combine all results into a single DataFrame
final_results = pd.concat(results, ignore_index=True)

# Display the final results
final_results.head()

In [None]:
results = []  # List to store results with the added month column
for i in range(1, 2):
    # Extract the month from the first 15 characters of the file name
    month = str(main_wf_exposure[i-1].name[6:16])
    
    # Perform the exposure calculation
    monthly_exposure = find_exposure.find_num_people_affected_by_geo(
        path_to_hazards=main_wf_exposure[i-1],
        path_to_additional_geos=zctas_2020,
        raster_path=repeated_paths[i-1],
        by_unique_hazard=False
    )
    
    # Add the month column to the result
    monthly_exposure['month'] = month
    
    # Append the result to the list
    results.append(monthly_exposure)

# Combine all results into a single DataFrame if needed
final_results = pd.concat(results, ignore_index=True)

In [None]:
i = 1
month = str(main_wf_exposure[i-1].name[6:16])
print(month)


In [None]:
print(main_wf_exposure[i-1])
print(zctas_2020)
print(repeated_paths[i-1])

In [None]:
# Perform the exposure calculation
monthly_exposure = find_exposure.find_num_people_affected_by_geo(
  path_to_hazards=main_wf_exposure[i-1],
  path_to_additional_geos=zctas_2020,
  raster_path=repeated_paths[i-1],
  by_unique_hazard=False)
    


In [None]:
print(monthly_exposure)

In [None]:
# Add the month column to the result
monthly_exposure['month'] = month