# Calculates monthly precipitation from GCM file

In [1]:
import xarray as xr
import rioxarray
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.ops import unary_union
import regionmask
import csv
from shapely.geometry import Point
import calendar
import matplotlib.pyplot as plt
import pyet
import os

In [2]:
# Add precipitation datasets, proj_pr started from 2015, therefore it was combined with a historical dataset to estimaet the 2000 - 2100 period
past_pr = xr.open_dataarray('precipitation_dataset_1850_2015.nc')
proj_pr = xr.open_dataarray('precipitation_dataset_2015_2100.nc')

# Folder containing the basins / sub-basins shapefiles, each basin must be an individual shapefile and have an ID number i.e. 'ID_11.shp'
shapefile_dir = 'basin_shapefile_folder'

# change lon/lat to region of inetrest
past_pr = past_pr.sel(lon=slice(50, 100), lat=slice(50, 10))
proj_pr = proj_pr.sel(lon=slice(50, 100), lat=slice(50, 10))

all_gcm = xr.concat([past_pr, proj_pr], dim='time')

# Output directory for the new NetCDF files
output_dir = 'pr_output_folder'
os.makedirs(output_dir, exist_ok=True)

# Function to calculate average precipitation and save to a new NetCDF file
def process_shapefile(shapefile, all_gcm):

    ## Finding number of seconds in each month
    time = all_gcm['time']
    
    time_index = pd.to_datetime(time.values)
    
    # Calculate the differences between consecutive time steps
    time_diffs = time_index.to_series().diff()
    
    # Convert the differences to seconds
    seconds_per_month = time_diffs.dt.total_seconds().dropna()
    
    # Convert the pandas Series to a numpy array
    seconds_array = seconds_per_month.values
    
    # Create a new DataArray for the seconds
    seconds_da = xr.DataArray(seconds_array, coords=[time_index[1:]], dims=['time'], name='seconds_per_month')
    
    # Add the new variable to the dataset
    all_gcm['seconds_per_month'] = seconds_da
    
    ## Finding number of days in each month
    
    time = all_gcm['time']
    
    time_index = pd.to_datetime(time.values)
    
    # Calculate the differences between consecutive time steps
    time_diffs = time_index.to_series().diff().dropna()
    
    # Convert the differences to seconds
    days_per_month = time_diffs.dt.days
    
    # Convert the pandas Series to a numpy array
    days_array = days_per_month.values
    
    # Create a new DataArray for the seconds
    days_da = xr.DataArray(days_array, coords=[time_index[1:]], dims=['time'], name='days_per_month')
    
    # Add the new variable to the dataset (r_ts = the sum of the entire basin (will need to sum the galciers within each individual basin before this process)
    all_gcm['days_per_month'] = days_da
    
    ## Correcting units
    
    seconds_per_month_coord = all_gcm.coords['seconds_per_month']
    
    days_per_month_coord = all_gcm.coords['days_per_month']
    
    gcm_proj_pr_day = (all_gcm*seconds_per_month_coord) / days_per_month_coord
    
    gcm_proj_pr_day_ts = gcm_proj_pr_day.sel(lon=72.5, lat=35, method='nearest').resample(time='AS').mean()
    
    ## Finding shp centeroids instead of cropping
    
    def read_shapefile(shapefile_path):
        return gpd.read_file(shapefile_path)

    def calculate_centroid(gdf):
        gdf = gdf.to_crs(epsg=32633)  # Reproject to a suitable projected CRS
        centroid = gdf.geometry.centroid
        return centroid

    def get_centroid_coordinates(centroid):
        lon, lat = centroid.x, centroid.y
        return lon, lat

    gdf = read_shapefile(shapefile_path)
    centroid = calculate_centroid(gdf)
    
    # If there are multiple geometries, you might want the centroid of the whole shapefile
    # by averaging the centroids of all geometries:
    if len(centroid) > 1:
        overall_centroid = centroid.unary_union.centroid
    else:
        overall_centroid = centroid.iloc[0]
    
    # Get the coordinates of the centroid
    lon, lat = get_centroid_coordinates(overall_centroid)
    
    ## Using centeroid to find precip at that point
    
    # Step 2: Specify the Point of Interest
    target_lon = lon  # Replace with your longitude
    target_lat = lat   # Replace with your latitude
    
    # Find the nearest grid point in the dataset to the specified lon, lat
    # This assumes the NetCDF file has dimensions 'lon' and 'lat'
    selected_point = gcm_proj_pr_day.sel(lon=target_lon, lat=target_lat, method='nearest')
    
    ## Creating NetCDF
    
    pr_ts_crop_sub = selected_point.sel(time=slice('2015-01-01T00:00:00.000000000', '2100-12-01T00:00:00.000000000'))

    # Extract time and data values
    time_values = pr_ts_crop_sub['time'].values
    data_values = pr_ts_crop_sub.values
    
    # Create a DataFrame
    df = pd.DataFrame({
        'time': time_values,
        'precipitation': data_values
    })
    
    # Define the output CSV file path
    output_csv_file = os.path.join(output_dir, os.path.splitext(os.path.basename(shapefile))[0] + '_avg_precipitation.csv')

    # Save the DataFrame to a CSV file
    df.to_csv(output_csv_file, index=False)

# Iterate over all shapefiles in the directory
for shapefile in os.listdir(shapefile_dir):
    if shapefile.endswith('.shp'):
        shapefile_path = os.path.join(shapefile_dir, shapefile)
        
        # Process each shapefile
        process_shapefile(shapefile_path, all_gcm)


  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_grouper = pd.Grouper(
  index_gr