In [None]:
import os
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Define Paths

In [None]:
data_folder = os.path.join("..", "..", "data", "tarragona")
projections_folder = "/Users/massimilianoarca/Documents/PoliMi/Research Grant/SafeCREW/Projections"

climate_projections_folder = os.path.join(projections_folder, 'European_Climate projections')
air_temperature_folder = os.path.join(climate_projections_folder, '2m air temperature')
precipitation_folder = os.path.join(climate_projections_folder, 'Precipitation')

climate_impact_indicators_folder = os.path.join(projections_folder, 'European_Hydrology_climate impact indicators')
river_discharge_folder = os.path.join(climate_impact_indicators_folder, 'River discharge')
water_temperature_folder = os.path.join(climate_impact_indicators_folder, 'Water temperature in catchments')

In [None]:
raw_data_folder = os.path.join(data_folder, 'raw_data')

## Load raw dataset

In [None]:
full_df = pd.read_excel(os.path.join(raw_data_folder, 'raw_full_dataset.xlsx'))

In [None]:
full_df

In [None]:
full_df.rename(
    columns={
        "flowriver": "Flow River",
        "cumulated_rainfall_24h": "Daily Cumulated Rainfall",
        "environmental_temperature": "Air Temperature",
        'nitrate': 'Nitrate',
        'dissolvedoxygen': 'Dissolved Oxygen',
        'turbidity': 'Turbidity',
        'watertemperature': 'Water Temperature',
        'redoxpotential': 'Redox Potential',
        'ABS254': 'Absorbance 254nm',
    },
    inplace=True,
)

In [None]:
full_df.isna().sum()

In [None]:
full_df = full_df.dropna()

## Take the Monthly Average

In [None]:
full_df['Year'] = full_df['DateTime'].dt.year
full_df['Month'] = full_df['DateTime'].dt.month

In [None]:
# take the monthly average
monthly_avg_df = full_df.groupby(['Year', 'Month']).mean().reset_index()

In [None]:
full_df = monthly_avg_df

In [None]:
xerta = {'lon': 0.489172, 'lat': 40.9084128}

# Air Temperature

- 30 year monthly average values
- Calculated as the mean monthly values of daily mean temperature averaged over all Januaries, Februaries, etc that are part of a 30 year period.

## Load Data

In [None]:
# load the data from the air temperature folder
air_temperature_files = [f for f in os.listdir(air_temperature_folder) if f.endswith('.nc')]

temp_rcp_45_files = [f for f in air_temperature_files if 'rcp45' in f]
temp_rcp_85_files = [f for f in air_temperature_files if 'rcp85' in f]

temp_hist_file = [f for f in air_temperature_files if f not in temp_rcp_45_files and f not in temp_rcp_85_files][0]

temp_rcp_45_ds = xr.open_mfdataset([os.path.join(air_temperature_folder, f) for f in temp_rcp_45_files], combine='by_coords', engine='netcdf4')
temp_rcp_85_ds = xr.open_mfdataset([os.path.join(air_temperature_folder, f) for f in temp_rcp_85_files], combine='by_coords', engine='netcdf4')
temp_hist_ds = xr.open_dataset(os.path.join(air_temperature_folder, temp_hist_file), engine='netcdf4')

## Inspect Data

In [None]:
# inspect the data
temp_rcp_45_ds.info()

## Extract Xerta Data

In [None]:
xerta = {'lon': 0.489172, 'lat': 40.9084128}

### Historical Data

In [None]:
hist_df = temp_hist_ds.to_dataframe()

In [None]:
hist_df = hist_df.reset_index()

In [None]:
hist_df

In [None]:
# find nearest lat and lon to Xerta from df
xerta_info = hist_df.loc[((hist_df['lat'] - xerta['lat'])**2 + (hist_df['lon'] - xerta['lon'])**2).idxmin()]

In [None]:
y_coord = xerta_info.loc['y']
x_coord = xerta_info.loc['x']

In [None]:
y_coord, x_coord

In [None]:
# get the temperature at Xerta
xerta_df = hist_df[(hist_df['x'] == x_coord) & (hist_df['y'] == y_coord)]

In [None]:
xerta_df['label'] = 'historical'

### RCP 4.5 Data

In [None]:
rcp_45_df = temp_rcp_45_ds.to_dataframe()

In [None]:
rcp_45_df = rcp_45_df.reset_index()

In [None]:
temp_df = rcp_45_df[(rcp_45_df['x'] == x_coord) & (rcp_45_df['y'] == y_coord)]
temp_df['label'] = 'rcp45'

xerta_df = pd.concat([xerta_df, temp_df])

### RCP 8.5 Data

In [None]:
rcp_85_df = temp_rcp_85_ds.to_dataframe()

In [None]:
rcp_85_df = rcp_85_df.reset_index()

In [None]:
temp_df = rcp_85_df[(rcp_85_df['x'] == x_coord) & (rcp_85_df['y'] == y_coord)]
temp_df['label'] = 'rcp85'

xerta_df = pd.concat([xerta_df, temp_df])

In [None]:
xerta_df

## Some Plots

In [None]:
# plot the tas_ymonmean variable for the first time step
plt.figure(figsize=(10, 7.5))
temp_rcp_45_ds['tas_ymonmean'].isel(time=0).plot()

# add point to (x_coord, y_coord)
plt.plot(x_coord, y_coord, 'ro')

plt.show()

In [None]:
# plot the data
temp_rcp_45_ds['tas_ymonmean'].hvplot.quadmesh(x='lon', y='lat', rasterize=True, cmap='inferno', project=True, coastline=True, width=800, height=500)

In [None]:
# plot the raw data together with the projections and the historical data

colors = {
    'historical': 'blue',
    'rcp45': 'green',
    'rcp85': 'red',
}

plt.figure(figsize=(30, 7.5))
for label in xerta_df['label'].unique():
    for year in xerta_df['time'].dt.year.unique():
        temp_df = xerta_df[(xerta_df['label'] == label) & (xerta_df['time'].dt.year == year)]
        sns.lineplot(x=temp_df['time'], y=temp_df['tas_ymonmean'], color=colors[label], marker='o')
        
    # set label
    plt.plot([], [], color=colors[label], label=label)

sns.lineplot(x=full_df['DateTime'], y=full_df['Air Temperature'], color='black', marker='o', label='Observed')

plt.xlabel('Time')
plt.ylabel('Temperature (°C)')

plt.title('Air Temperature at Xerta')

plt.legend()
plt.show()


# Precipitation

- 30 year monthly average values
- Calculated as the mean monthly values of daily precipitation averaged over all Januaries, Februaries, etc that are part of a 30 year period.

## Load Data

In [None]:
# load the data from the air temperature folder
precipitation_files = [f for f in os.listdir(precipitation_folder) if f.endswith('.nc')]

rain_rcp_45_files = [f for f in precipitation_files if 'rcp45' in f]
rain_rcp_85_files = [f for f in precipitation_files if 'rcp85' in f]
# difference of all files - the rcps
rain_hist_file = [f for f in precipitation_files if f not in rain_rcp_45_files and f not in rain_rcp_85_files][0]

rain_rcp_45_ds = xr.open_mfdataset([os.path.join(precipitation_folder, f) for f in rain_rcp_45_files], combine='by_coords', engine='netcdf4')
rain_rcp_85_ds = xr.open_mfdataset([os.path.join(precipitation_folder, f) for f in rain_rcp_85_files], combine='by_coords', engine='netcdf4')
rain_hist_ds = xr.open_dataset(os.path.join(precipitation_folder, rain_hist_file), engine='netcdf4')

## Inspect Data

In [None]:
rain_rcp_45_ds.info()

## Extract Xerta Data

In [None]:
xerta = {'lon': 0.489172, 'lat': 40.9084128}

### Historical Data

In [None]:
hist_df = rain_hist_ds.to_dataframe()

In [None]:
hist_df = hist_df.reset_index()

In [None]:
hist_df

In [None]:
# find nearest lat and lon to Xerta from df
xerta_info = hist_df.loc[((hist_df['lat'] - xerta['lat'])**2 + (hist_df['lon'] - xerta['lon'])**2).idxmin()]

In [None]:
y_coord = xerta_info.loc['y']
x_coord = xerta_info.loc['x']

In [None]:
# get the temperature at Xerta
xerta_df = hist_df[(hist_df['x'] == x_coord) & (hist_df['y'] == y_coord)]

In [None]:
xerta_df['label'] = 'historical'

### RCP 4.5 Data

In [None]:
rcp_45_df = rain_rcp_45_ds.to_dataframe()

In [None]:
rcp_45_df = rcp_45_df.reset_index()

In [None]:
temp_df = rcp_45_df[(rcp_45_df['x'] == x_coord) & (rcp_45_df['y'] == y_coord)]
temp_df['label'] = 'rcp45'

xerta_df = pd.concat([xerta_df, temp_df])

### RCP 8.5 Data

In [None]:
rcp_85_df = rain_rcp_85_ds.to_dataframe()

In [None]:
rcp_85_df = rcp_85_df.reset_index()

In [None]:
temp_df = rcp_85_df[(rcp_85_df['x'] == x_coord) & (rcp_85_df['y'] == y_coord)]
temp_df['label'] = 'rcp85'

xerta_df = pd.concat([xerta_df, temp_df])

In [None]:
xerta_df

## Some Plots

In [None]:
# plot the tas_ymonmean variable for the first time step
plt.figure(figsize=(10, 7.5))
rain_rcp_45_ds['pr_ymonmean'].isel(time=0).plot()

# add point to (x_coord, y_coord)
plt.plot(x_coord, y_coord, 'ro')

plt.show()

In [None]:
# plot the data
rain_rcp_45_ds['pr_ymonmean'].hvplot.quadmesh(x='lon', y='lat', rasterize=True, cmap='inferno', project=True, coastline=True, width=800, height=500)

In [None]:
# plot the raw data together with the projections and the historical data

colors = {
    'historical': 'blue',
    'rcp45': 'green',
    'rcp85': 'red',
}

plt.figure(figsize=(30, 7.5))
for label in xerta_df['label'].unique():
    for year in xerta_df['time'].dt.year.unique():
        temp_df = xerta_df[(xerta_df['label'] == label) & (xerta_df['time'].dt.year == year)]
        sns.lineplot(x=temp_df['time'], y=temp_df['pr_ymonmean'], color=colors[label], marker='o')
        
    # set label
    plt.plot([], [], color=colors[label], label=label)

sns.lineplot(x=full_df['DateTime'], y=full_df[full_df['Daily Cumulated Rainfall'] < 3]['Daily Cumulated Rainfall'], color='black', marker='o', label='Observed')

plt.xlabel('Time')
plt.ylabel('Daily Cumulated Rainfall (mm)')

plt.title('Daily Cumulated Rainfall at Xerta')

plt.legend()
plt.show()


# River Discharge

## Load Data

In [None]:
river_discharge_files = [f for f in os.listdir(river_discharge_folder) if f.endswith('.nc')]

river_rcp_45_files = [f for f in river_discharge_files if 'rcp45' in f]
river_rcp_85_files = [f for f in river_discharge_files if 'rcp85' in f]

river_rcp_45_files = [f for f in river_rcp_45_files if os.path.getsize(os.path.join(river_discharge_folder, f)) / 1024 / 1024 > 10]
river_rcp_85_files = [f for f in river_rcp_85_files if os.path.getsize(os.path.join(river_discharge_folder, f)) / 1024 / 1024 > 10]

river_hist_files = [f for f in river_discharge_files if f not in river_rcp_45_files and f not in river_rcp_85_files]

river_hist_files = [f for f in river_hist_files if os.path.getsize(os.path.join(river_discharge_folder, f)) / 1024 / 1024 > 10]

In [None]:
river_hist_ds = xr.open_mfdataset([os.path.join(river_discharge_folder, f) for f in river_hist_files], combine='by_coords', engine='netcdf4')

In [None]:
river_rcp_45_ds = xr.open_mfdataset([os.path.join(river_discharge_folder, f) for f in river_rcp_45_files], combine='by_coords', engine='netcdf4')

In [None]:
river_rcp_85_ds = xr.open_mfdataset([os.path.join(river_discharge_folder, f) for f in river_rcp_85_files], combine='by_coords', engine='netcdf4')

### Historical Data

In [None]:
hist_df = river_hist_ds.to_dataframe()
hist_df = hist_df.reset_index()

In [None]:
# get the temperature at Xerta
xerta_df = hist_df[(hist_df['x'] == x_coord) & (hist_df['y'] == y_coord)]

In [None]:
xerta_df['label'] = 'historical'

### RCP 4.5 Data

In [None]:
river_rcp_45_df = river_rcp_45_ds.to_dataframe()
river_rcp_45_df = river_rcp_45_df.reset_index()

In [None]:
temp_df = river_rcp_45_df[(river_rcp_45_df['x'] == x_coord) & (river_rcp_45_df['y'] == y_coord)]
temp_df['label'] = 'rcp45'

xerta_df = pd.concat([xerta_df, temp_df])

### RCP 8.5 Data

In [None]:
river_rcp_85_df = river_rcp_85_ds.to_dataframe()
river_rcp_85_df = river_rcp_85_df.reset_index()

In [None]:
temp_df = river_rcp_85_df[(river_rcp_85_df['x'] == x_coord) & (river_rcp_85_df['y'] == y_coord)]
temp_df['label'] = 'rcp85'

xerta_df = pd.concat([xerta_df, temp_df])

In [None]:
xerta_df

## Some Plots

In [None]:
# plot the tas_ymonmean variable for the first time step
plt.figure(figsize=(10, 7.5))
river_rcp_45_ds['rdis_ymonmean'].isel(time=0).plot()

# add point to (x_coord, y_coord)
plt.plot(x_coord, y_coord, 'ro')

plt.show()

In [None]:
river_rcp_45_ds.lat

In [None]:
river_rcp_45_ds.lat = river_rcp_45_ds.lat

In [None]:
river_rcp_45_ds = river_rcp_45_ds.assign_coords(lat=river_rcp_45_ds['lat'])

river_rcp_45_ds.coords['lat'].values = river_rcp_45_ds.coords['lat'].values.reshape(-1, river_rcp_45_ds.coords['lat'].values.shape[-2], river_rcp_45_ds.coords['lat'].values.shape[-1])

In [None]:
# plot the data
river_rcp_45_ds['rdis_ymonmean'].hvplot.quadmesh(x='lon', y='lat', rasterize=True, cmap='inferno', project=True, coastline=True, width=800, height=500)

In [None]:
# plot the raw data together with the projections and the historical data

colors = {
    'historical': 'blue',
    'rcp45': 'green',
    'rcp85': 'red',
}

plt.figure(figsize=(30, 7.5))
for label in xerta_df['label'].unique():
    for year in xerta_df['time'].dt.year.unique():
        temp_df = xerta_df[(xerta_df['label'] == label) & (xerta_df['time'].dt.year == year)]
        sns.lineplot(x=temp_df['time'], y=temp_df['rdis_ymonmean'], color=colors[label], marker='o')
        
    # set label
    plt.plot([], [], color=colors[label], label=label)

sns.lineplot(x=full_df[full_df['Flow River'] < 700]['DateTime'], y=full_df[full_df['Flow River'] < 700]['Flow River'], color='black', marker='o', label='Observed')

plt.xlabel('Time')
plt.ylabel('Flow River (m³/s)')

plt.title('Flow River at Xerta')

plt.legend()
plt.show()


# Water Temperature

# PROBLEM

The data has a a strange format, the coordinates are (id, time) and there are no x, y coordinates. I searched the ID coordinate system and no results showed up.

In [None]:
water_temperature_files = [f for f in os.listdir(water_temperature_folder) if f.endswith('.nc')]

water_rcp_45_files = [f for f in water_temperature_files if 'rcp45' in f]
water_rcp_85_files = [f for f in water_temperature_files if 'rcp85' in f]

water_hist_files = [f for f in water_temperature_files if f not in water_rcp_45_files and f not in water_rcp_85_files]
# water_hist_files = [f for f in water_hist_files if os.path.getsize(os.path.join(water_temperature_folder, f)) / 1024 / 1024 > 10]

In [None]:
water_rcp_45_ds = xr.open_mfdataset([os.path.join(water_temperature_folder, f) for f in water_rcp_45_files], engine='netcdf4')

In [None]:
water_rcp_85_ds = xr.open_mfdataset([os.path.join(water_temperature_folder, f) for f in water_rcp_85_files], combine='by_coords', engine='netcdf4')

In [None]:
water_hist_ds = xr.open_mfdataset([os.path.join(water_temperature_folder, f) for f in water_hist_files], combine='nested', engine='netcdf4', compat='override', coords='minimal')

In [None]:
water_rcp_45_ds.info()

### Historical Data

In [None]:
hist_df = water_hist_ds.to_dataframe()
hist_df = hist_df.reset_index()

In [None]:
# get the temperature at Xerta
xerta_df = hist_df[(hist_df['x'] == x_coord) & (hist_df['y'] == y_coord)]

In [None]:
xerta_df['label'] = 'historical'

### RCP 4.5 Data

In [None]:
xerta_df = pd.DataFrame()

In [None]:
water_rcp_45_df = water_rcp_45_ds.to_dataframe()
water_rcp_45_df = water_rcp_45_df.reset_index()

In [None]:
water_rcp_45_df

In [None]:
temp_df = river_rcp_45_df[(river_rcp_45_df['x'] == x_coord) & (river_rcp_45_df['y'] == y_coord)]
temp_df['label'] = 'rcp45'

xerta_df = pd.concat([xerta_df, temp_df])

### RCP 8.5 Data

In [None]:
river_rcp_85_df = river_rcp_85_ds.to_dataframe()
river_rcp_85_df = river_rcp_85_df.reset_index()

In [None]:
temp_df = river_rcp_85_df[(river_rcp_85_df['x'] == x_coord) & (river_rcp_85_df['y'] == y_coord)]
temp_df['label'] = 'rcp85'

xerta_df = pd.concat([xerta_df, temp_df])

In [None]:
xerta_df

## Some Plots

In [None]:
# plot the tas_ymonmean variable for the first time step
plt.figure(figsize=(10, 7.5))
water_rcp_45_ds['wtemp_ymonmean'].isel(time=0).plot()

# add point to (x_coord, y_coord)
plt.plot(x_coord, y_coord, 'ro')

plt.show()

In [None]:
river_rcp_45_ds.lat

In [None]:
river_rcp_45_ds.lat = river_rcp_45_ds.lat

In [None]:
river_rcp_45_ds = river_rcp_45_ds.assign_coords(lat=river_rcp_45_ds['lat'])

river_rcp_45_ds.coords['lat'].values = river_rcp_45_ds.coords['lat'].values.reshape(-1, river_rcp_45_ds.coords['lat'].values.shape[-2], river_rcp_45_ds.coords['lat'].values.shape[-1])

In [None]:
# plot the data
river_rcp_45_ds['rdis_ymonmean'].hvplot.quadmesh(x='lon', y='lat', rasterize=True, cmap='inferno', project=True, coastline=True, width=800, height=500)

In [None]:
# plot the raw data together with the projections and the historical data

colors = {
    'historical': 'blue',
    'rcp45': 'green',
    'rcp85': 'red',
}

plt.figure(figsize=(30, 7.5))
for label in xerta_df['label'].unique():
    for year in xerta_df['time'].dt.year.unique():
        temp_df = xerta_df[(xerta_df['label'] == label) & (xerta_df['time'].dt.year == year)]
        sns.lineplot(x=temp_df['time'], y=temp_df['rdis_ymonmean'], color=colors[label], marker='o')
        
    # set label
    plt.plot([], [], color=colors[label], label=label)

sns.lineplot(x=full_df[full_df['Flow River'] < 700]['DateTime'], y=full_df[full_df['Flow River'] < 700]['Flow River'], color='black', marker='o', label='Observed')

plt.xlabel('Time')
plt.ylabel('Flow River (m³/s)')

plt.title('Flow River at Xerta')

plt.legend()
plt.show()
