This notebook will look over the weather data and extract point values


In [None]:
from tqdm.auto import tqdm
from pathlib import Path
import os
import zipfile

try:
    from multiprocess import Pool
except:
    from multiprocessing import Pool
import rasterio
import geopandas as gpd
import pandas as pd

In [None]:
# unzip point data file from repo
zipped_point_data = os.path.join(
    os.getcwd(), "data", "all dams centroids with water and dates.zip"
)
with zipfile.ZipFile(zipped_point_data, "r") as zip_ref:
    zip_ref.extractall(os.path.dirname(zipped_point_data))

In [None]:
point_data_input_path = zipped_point_data.replace(".zip", ".gpkg")
os.path.isfile(point_data_input_path)

In [None]:
# read in vars from downlaod notebook
%store -r dam_forcast_working_dir
%store -r ANU_cliamte_downlaod_dir
%store -r min_year
%store -r max_year
%store -r climate_types
%store point_data_input_path
print(dam_forcast_working_dir)
print(ANU_cliamte_downlaod_dir)
print(min_year)
print(max_year)
print(climate_types)

In [None]:
# output folder for the time step data
time_step_folder = os.path.join(dam_forcast_working_dir,'monthly points')
%store time_step_folder
# create this folder if it does not exist
Path(time_step_folder).mkdir(parents=True, exist_ok=True)
time_step_folder

In [None]:
# get a list to all raster files
raster_list = list(ANU_cliamte_downlaod_dir.glob("*.nc"))
len(raster_list)

In [None]:
# open vector data
dam_points = gpd.read_file(point_data_input_path)
dam_points.head()

In [None]:
# reproject to WGS 84 if not already
if dam_points.crs.to_epsg() != 4326:
    dam_points = dam_points.to_crs("EPSG:4326")

In [None]:
# get xy coords from df
coord_list = list(zip(dam_points["geometry"].x, dam_points["geometry"].y))
coord_list[0]

In [None]:
# this func will open a climate file and extract the point values
def extract_points_from_raster(raster_path):
    file_name = os.path.basename(raster_path)
    #     get the date from the file name
    date = file_name.split("_")[-1].replace(".nc", "")
    #     get the climate type from the file name
    climate_type = os.path.basename(raster_path).split("_")[2]
    export_file_name = f"{date}_{climate_type}.pkl"
    export_path = os.path.join(time_step_folder, export_file_name)
    #     skip if aready done
    if not os.path.isfile(export_path):
        #         open raster
        src = rasterio.open(raster_path)
        raster_vals = []
        #         point sample raster using rio
        for coords in src.sample(coord_list):
            raster_vals.append(coords[0])
        #         place into df
        point_samp_df = pd.DataFrame(raster_vals, columns=[date])
        #         save to disk
        point_samp_df.to_pickle(export_path)

        return export_path
    return None

In [None]:
# extract point samples with multiprocessing
with Pool() as p:
    export_paths = list(
        tqdm(
            p.imap(extract_points_from_raster, raster_list),
            total=len(raster_list),
            desc="Extracting points from rasters",
        )
    )