In [None]:
import rioxarray
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import os, glob
from pathlib import Path
import pandas as pd
import geopandas as gpd
import rasterio
import datetime
from dotenv import load_dotenv
load_dotenv()
xr.set_options(keep_attrs=True, display_expand_data=False)

In [15]:
rasters_path = os.getenv('RASTERS_PATH')

In [17]:
raster_files = str(Path(rasters_path) / '*.tif')
file_names = [os.path.basename(x) for x in glob.glob(raster_files)]
file_paths = [os.path.abspath(x) for x in glob.glob(raster_files)]

months_str_list = ['ene', 'feb', 'mar', 'abr', 'may', 'jun', 'jul', 'ago', 'sep', 'oct', 'nov', 'dic']
months_num_list = range(1, 13)
my_dict = dict(zip(months_str_list, months_num_list))

dates = []
for file in file_names:
        file_name = Path(file).name
        month_str = file_name[5:8]
        year = file_name[-8:-4]
        for key, value in my_dict.items():
            if month_str == key:
                month = value
        date = pd.to_datetime(f'{year}-{month}')
        dates.append(date)

# Create variable used for time axis
time_var = xr.Variable('time', dates)

# Load in and concatenate all individual GeoTIFFs
geotiffs_da = xr.concat([rioxarray.open_rasterio(i) for i in file_paths], dim=time_var)

# Covert our xarray.DataArray into a xarray.Dataset
geotiffs_ds = geotiffs_da.to_dataset("band")
geotiffs_ds = geotiffs_ds.rename({1: 'precipitacion'})

geotiffs_ds

In [18]:
# Export dataset to netCDF file format
out_file = Path(raster_files).parent.parent / 'precipitaciones_arg_PMRA.nc'
geotiffs_ds.to_netcdf(out_file)

In [19]:
# Convert the ds to a dataframe
precipitacion_df = geotiffs_ds.to_dataframe().reset_index()
precipitacion_df = precipitacion_df[['time', 'x', 'y', 'precipitacion']]
precipitacion_df = precipitacion_df.dropna()
precipitacion_df.precipitacion = precipitacion_df.precipitacion.astype('int8')
print(precipitacion_df.shape)
precipitacion_df.head()

(40186290, 4)


Unnamed: 0,time,x,y,precipitacion
8061,2001-05-01,-66.542705,-55.044269,49
8062,2001-05-01,-66.497789,-55.044269,50
8064,2001-05-01,-66.452873,-55.044269,50
8774,2001-05-01,-66.542705,-54.999353,50
8775,2001-05-01,-66.497789,-54.999353,49


In [None]:
# Export dataset to an excel file
out_file = Path(raster_files).parent.parent / 'precipitaciones_arg_PMRA.xlsx'
precipitacion_df.to_excel(out_file)

In [20]:
# Convert the df to a GeoDataFrame
from shapely.geometry import Point
precipitacion_df['geometry'] = precipitacion_df.apply(lambda row: Point(row['x'], row['y']), axis=1)
gdf = gpd.GeoDataFrame(precipitacion_df)
gdf.head()

Unnamed: 0,time,x,y,precipitacion,geometry
8061,2001-05-01,-66.542705,-55.044269,49,POINT (-66.54270 -55.04427)
8062,2001-05-01,-66.497789,-55.044269,50,POINT (-66.49779 -55.04427)
8064,2001-05-01,-66.452873,-55.044269,50,POINT (-66.45287 -55.04427)
8774,2001-05-01,-66.542705,-54.999353,50,POINT (-66.54270 -54.99935)
8775,2001-05-01,-66.497789,-54.999353,49,POINT (-66.49779 -54.99935)


In [None]:
# Export dataset to GPKG file format
out_file = Path(raster_files).parent.parent / 'precipitaciones_arg_PMRA.gpkg'
gdf.to_file(out_file, driver='GPKG')

In [14]:
# ds = xr.open_dataset(out_file)

In [21]:
# ds