In [74]:
import pandas as pd
import numpy as np
import os
import re
import shutil
import xarray as xr
from IPython.display import display, Markdown
from scipy.spatial import cKDTree

## Hazard Standard Deviation extraction for Cordex Datasets

## Extreme Temperature Hazards STD

In [55]:
# Temperature indicators folders for the extreme temperature hazards
temp_daysAbove_folder = '/work/cmcc/dg07124/climax/indicators/cordex2/tempdays/tempDaysAbove/std_ensembles'
temp_percentiles_folder = '/work/cmcc/dg07124/climax/indicators/cordex2/tempPercentiles/std_ensembles'

temp_std_ensembles_folder = '/work/cmcc/dg07124/climax/indicators/cordex2/temp_std_ensembles'

In [56]:
# Load the coordinates of the airports from the Excel file
csv_file = '/users_home/cmcc/dg07124/climax/airports_coordinates.csv'
airports_df = pd.read_csv(csv_file)
airports_df

Unnamed: 0,Airports,Lat,Lon
0,Milano Malpensa,45.63,8.73
1,Bergamo Orio al Serio,45.67,9.71
2,Milano Linate,45.45,9.28
3,Roma Fiumicino,41.8,12.25
4,Roma Ciampino,41.8,12.59
5,Napoli Capodichino,40.88,14.29
6,Palermo Punta Raisi,38.18,13.1
7,Catania Fontanarossa,37.47,15.07
8,Cagliari Elmas,39.25,9.06


In [57]:
if not os.path.exists(temp_std_ensembles_folder):
    os.makedirs(temp_std_ensembles_folder)

# Step 2: Copy files from temp_daysAbove_folder and temp_percentiles_folder to temp_avg_ensembles_folder
def copy_files_to_new_folder(source_folder, destination_folder):
    for file_name in os.listdir(source_folder):
        source_file = os.path.join(source_folder, file_name)
        destination_file = os.path.join(destination_folder, file_name)
        
        if os.path.isfile(source_file):
            shutil.copy(source_file, destination_file)  # Copy file

# Copy files from both folders
copy_files_to_new_folder(temp_daysAbove_folder, temp_std_ensembles_folder)
copy_files_to_new_folder(temp_percentiles_folder, temp_std_ensembles_folder)

In [58]:
# Define time periods and RCP scenarios
time_periods = ['2021-2050', '2041-2070', '2071-2100']
rcp_scenarios = ['rcp26', 'rcp45', 'rcp85']

In [59]:
# Create the KDTree for fast nearest-neighbor search
def create_kd_tree(latitudes, longitudes):
    coords = np.vstack((latitudes, longitudes)).T  # Shape (n, 2)
    tree = cKDTree(coords)
    return tree

In [60]:
# Function to process the netCDF files for each time period and RCP scenario
def process_netCDF_files(netcdf_folder, time_period, rcp_scenario, airports_df, tree):
    # Initialize the list to hold the extracted values (one row for each airport)
    extracted_values = []

    # Define columns for the indicators (fixed set of columns)
    columns = ['Airports', 'p95', 'p999', 'tempabove35', 'tempabove40', 'tempabove45']

    # Initialize the dictionary to hold values for each airport
    for _, row in airports_df.iterrows():
        extracted_values_airports = {'Airports': row['Airports']}
        for col in columns[1:]:
            extracted_values_airports[col] = np.nan  # Initialize all indicators to NaN for this airport

        # Loop through each netCDF file and check if it belongs to the given time period and RCP scenario
        for file in os.listdir(netcdf_folder):
            if file.endswith('.nc') and time_period in file and rcp_scenario in file:
                # print(f"Processingnetcdf_folderfile: {netcdf_folder}")
                # print(f"Processing file: {file}")

                # Open the netCDF file using xarray
                nc_file_path = os.path.join(netcdf_folder, file)
                ds = xr.open_dataset(nc_file_path)

                # Extract the lat/lon from the netCDF file
                lat = ds['lat'].values
                lon = ds['lon'].values

                # Flatten the 2D arrays to 1D for KDTree usage
                lat_flat = lat.flatten()
                lon_flat = lon.flatten()

                # Create a 2D array of lat/lon coordinates
                coords_flat = np.vstack((lat_flat, lon_flat)).T  # Shape: (n, 2)

                # Create a KDTree for fast nearest-neighbor search
                tree = cKDTree(coords_flat)

                # Extract the coordinates for each airport
                airport_coords = (row['Lat'], row['Lon'])

                # Find the closest coordinates from the netCDF lat/lon grid for the current airport
                closest_idx = tree.query([airport_coords], k=1)[1]  # Get index of closest point
                
                # Convert the flattened index to (y, x) coordinates
                closest_y, closest_x = np.unravel_index(closest_idx, lat.shape)

                # Extract the relevant variable value for each indicator
                extracted_value = ds['tasmax'].isel(y=closest_y, x=closest_x).values
                # print(f'extracted_value: {extracted_value.flatten()[0]}')

                # Check the indicator based on the filename and update the corresponding column
                if 'stdabove35' in file:
                    extracted_values_airports['tempabove35'] = extracted_value.flatten()[0]
                elif 'stdabove40' in file:
                    extracted_values_airports['tempabove40'] = extracted_value.flatten()[0]
                elif 'stdabove45' in file:
                    extracted_values_airports['tempabove45'] = extracted_value.flatten()[0]
                elif 'std95' in file:
                    extracted_values_airports['p95'] = extracted_value.flatten()[0]
                elif 'std999' in file:
                    extracted_values_airports['p999'] = extracted_value.flatten()[0]
                    

        # Add the extracted values for this airport to the list
        extracted_values.append(extracted_values_airports)

    # Convert the extracted values to a DataFrame
    df = pd.DataFrame(extracted_values, columns=columns)
    return df

In [61]:
# Create a KDTree using airport coordinates
tree = create_kd_tree(airports_df['Lat'], airports_df['Lon'])

# Process the files for each time period and RCP scenario and create separate DataFrames
dfs = {}  # Dictionary to store DataFrames for each time period + RCP combination

for time_period in time_periods:
    for rcp_scenario in rcp_scenarios:
        # Process the files for the given time period and RCP scenario
        df = process_netCDF_files(temp_std_ensembles_folder, time_period, rcp_scenario, airports_df, tree)
        if df is not None:
            dfs[f'{rcp_scenario}_{time_period}'] = df

# Display the DataFrames for each time period and RCP scenario
for key, df in dfs.items():
    print(f"Data for {key}:")
    df = df.set_index('Airports')
    # df['average'] = df.mean(axis=1)
    print(df)
    # save to CSV
    df.to_csv(f'/work/cmcc/dg07124/climax/indicators/cordex2/temp_std_ensembles/temp_std_{key}_data.csv', index=True)

Data for rcp26_2021-2050:
                            p95      p999  tempabove35  tempabove40  \
Airports                                                              
Milano Malpensa        0.681519  1.173082     3.318777     0.787577   
Bergamo Orio al Serio  0.657671  1.149640     3.564539     0.249772   
Milano Linate          0.612470  1.268595     4.648652     0.804932   
Roma Fiumicino         0.524879  0.825922     4.588714     0.216768   
Roma Ciampino          0.584376  0.827377     5.180357     0.228932   
Napoli Capodichino     0.424670  0.845480     3.800186     0.465685   
Palermo Punta Raisi    0.302887  0.615009     1.390151     0.232121   
Catania Fontanarossa   0.351584  0.753102     6.414966     1.084037   
Cagliari Elmas         0.476794  0.893550     7.222046     0.619808   

                       tempabove45  
Airports                            
Milano Malpensa           0.001843  
Bergamo Orio al Serio     0.000000  
Milano Linate             0.053435  
Roma Fi

## Example of the output for the hazard of extrem temperature for the scenario RCP 2.6 and time period 2021-2050

In [62]:
df = pd.read_csv('/work/cmcc/dg07124/climax/indicators/cordex2/temp_std_ensembles/temp_std_rcp26_2021-2050_data.csv', index_col='Airports')
df

Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.681519,1.173082,3.318777,0.787577,0.001843
Bergamo Orio al Serio,0.657671,1.14964,3.564539,0.249772,0.0
Milano Linate,0.61247,1.268595,4.648652,0.804932,0.053435
Roma Fiumicino,0.524879,0.825922,4.588714,0.216768,0.0
Roma Ciampino,0.584376,0.827377,5.180357,0.228932,0.0
Napoli Capodichino,0.42467,0.84548,3.800186,0.465685,0.07157
Palermo Punta Raisi,0.302887,0.615009,1.390151,0.232121,0.0
Catania Fontanarossa,0.351584,0.753102,6.414966,1.084037,0.143384
Cagliari Elmas,0.476794,0.89355,7.222046,0.619808,0.018426


In [63]:
# Define the folder path where your CSV files are stored
csv_folder_path = '/work/cmcc/dg07124/climax/indicators/cordex2/temp_std_ensembles'

# List all the CSV files in the folder
csv_files = [f for f in os.listdir(csv_folder_path) if f.endswith('.csv')]
csv_files

['temp_std_rcp85_2041-2070_data.csv',
 'temp_std_rcp45_2041-2070_data.csv',
 'temp_std_rcp45_2071-2100_data.csv',
 'temp_std_rcp26_2071-2100_data.csv',
 'temp_std_rcp45_2021-2050_data.csv',
 'temp_std_rcp26_2021-2050_data.csv',
 'temp_std_rcp85_2021-2050_data.csv',
 'temp_std_rcp85_2071-2100_data.csv',
 'temp_std_rcp26_2041-2070_data.csv']

In [78]:
# Open each CSV (Standard Deviation for the Hazarsd for Extreme Temperature) as a DataFrame
for csv_file in csv_files:
    file_path = os.path.join(csv_folder_path, csv_file)
    df = pd.read_csv(file_path, index_col='Airports')
    
    print(f"\n\033[1mStandard Deviation Hazard for: {csv_file}\033[0m\n")
    display(df)


[1mStandard Deviation Hazard for: temp_std_rcp85_2041-2070_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.731262,0.702369,7.608554,2.444121,0.135919
Bergamo Orio al Serio,0.698533,0.95389,7.83879,1.785591,0.012423
Milano Linate,0.702486,0.904333,8.972479,3.594999,0.165114
Roma Fiumicino,0.56179,0.751451,9.882092,1.204068,0.0
Roma Ciampino,0.592269,0.673416,10.515683,1.329396,0.012423
Napoli Capodichino,0.509219,0.92729,8.358044,1.045679,0.194976
Palermo Punta Raisi,0.25916,0.795024,2.879451,0.71516,0.018426
Catania Fontanarossa,0.331662,0.709035,12.205804,3.459762,0.526887
Cagliari Elmas,0.438389,0.798211,12.976156,2.727733,0.186153



[1mStandard Deviation Hazard for: temp_std_rcp45_2041-2070_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.617368,0.952694,5.178634,1.453482,0.062202
Bergamo Orio al Serio,0.618357,0.922267,5.046188,1.02914,0.009213
Milano Linate,0.601042,0.97941,6.309758,2.049859,0.099087
Roma Fiumicino,0.459624,0.913802,7.035359,0.612182,0.028734
Roma Ciampino,0.468233,0.917239,6.41225,0.742404,0.009213
Napoli Capodichino,0.379854,0.941929,6.335132,0.674627,0.111736
Palermo Punta Raisi,0.379039,0.766979,2.219101,0.325286,0.0
Catania Fontanarossa,0.358439,0.817603,8.728038,1.987495,0.233092
Cagliari Elmas,0.406472,1.066539,9.642676,1.84048,0.098914



[1mStandard Deviation Hazard for: temp_std_rcp45_2071-2100_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.733141,1.106038,6.808542,2.329334,0.178953
Bergamo Orio al Serio,0.68646,1.131841,7.188333,1.675326,0.021403
Milano Linate,0.686166,1.11673,8.451864,3.238049,0.206574
Roma Fiumicino,0.559556,0.911627,8.887,1.043347,0.038446
Roma Ciampino,0.634023,0.996063,9.183876,1.150653,0.032889
Napoli Capodichino,0.574716,0.996843,8.523521,1.098984,0.190023
Palermo Punta Raisi,0.513738,0.819404,3.46353,0.74123,0.009531
Catania Fontanarossa,0.523745,0.922735,12.53139,3.078992,0.349276
Cagliari Elmas,0.617601,1.385469,13.637557,2.652229,0.13364



[1mStandard Deviation Hazard for: temp_std_rcp26_2071-2100_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.623893,1.057265,2.787464,0.705,0.036918
Bergamo Orio al Serio,0.614063,0.844754,2.986865,0.41714,0.009213
Milano Linate,0.605729,0.893574,3.239592,0.930525,0.046128
Roma Fiumicino,0.604547,0.889843,3.809839,0.291942,0.0
Roma Ciampino,0.686293,0.850379,3.906746,0.303159,0.0
Napoli Capodichino,0.567988,1.046285,4.070541,0.310695,0.045851
Palermo Punta Raisi,0.37527,0.635072,1.273731,0.259678,0.0
Catania Fontanarossa,0.338311,0.815669,5.564975,1.238261,0.243473
Cagliari Elmas,0.463306,0.997664,5.794419,0.796488,0.067268



[1mStandard Deviation Hazard for: temp_std_rcp45_2021-2050_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.446618,1.010088,3.333919,0.809989,0.016583
Bergamo Orio al Serio,0.441997,0.935308,3.396257,0.533834,0.0
Milano Linate,0.423465,0.9073,4.25284,1.307682,0.03805
Roma Fiumicino,0.387599,0.745171,4.653045,0.345611,0.009213
Roma Ciampino,0.400369,0.590373,4.664936,0.335586,0.0
Napoli Capodichino,0.3598,0.858633,3.943516,0.550707,0.082207
Palermo Punta Raisi,0.308694,0.471801,2.051202,0.342317,0.0
Catania Fontanarossa,0.233058,0.670039,5.359851,1.529307,0.271141
Cagliari Elmas,0.361362,0.767072,6.100119,1.202726,0.025459



[1mStandard Deviation Hazard for: temp_std_rcp26_2021-2050_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.681519,1.173082,3.318777,0.787577,0.001843
Bergamo Orio al Serio,0.657671,1.14964,3.564539,0.249772,0.0
Milano Linate,0.61247,1.268595,4.648652,0.804932,0.053435
Roma Fiumicino,0.524879,0.825922,4.588714,0.216768,0.0
Roma Ciampino,0.584376,0.827377,5.180357,0.228932,0.0
Napoli Capodichino,0.42467,0.84548,3.800186,0.465685,0.07157
Palermo Punta Raisi,0.302887,0.615009,1.390151,0.232121,0.0
Catania Fontanarossa,0.351584,0.753102,6.414966,1.084037,0.143384
Cagliari Elmas,0.476794,0.89355,7.222046,0.619808,0.018426



[1mStandard Deviation Hazard for: temp_std_rcp85_2021-2050_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.483706,1.226128,3.741642,1.194269,0.054916
Bergamo Orio al Serio,0.505754,1.37264,4.033515,0.815977,0.0
Milano Linate,0.466678,1.423913,4.64217,1.776248,0.148881
Roma Fiumicino,0.449555,1.080606,5.783633,0.533596,0.009213
Roma Ciampino,0.469419,1.009794,5.910766,0.54764,0.0
Napoli Capodichino,0.406028,1.018716,4.523608,0.626797,0.119949
Palermo Punta Raisi,0.271204,0.714727,1.822134,0.343559,0.0
Catania Fontanarossa,0.311886,0.899216,7.42297,2.053937,0.253224
Cagliari Elmas,0.389761,1.21005,7.756357,1.100951,0.045134



[1mStandard Deviation Hazard for: temp_std_rcp85_2071-2100_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,1.05989,1.376907,15.382883,6.429328,1.048282
Bergamo Orio al Serio,1.0816,1.321048,15.864742,5.901763,0.375436
Milano Linate,1.003152,1.452095,17.512472,8.70378,1.448174
Roma Fiumicino,0.770016,0.997169,21.14393,3.943505,0.162805
Roma Ciampino,0.770391,1.041001,19.446971,4.614446,0.174963
Napoli Capodichino,0.799083,1.102807,20.606998,3.783762,0.416115
Palermo Punta Raisi,0.571223,1.017339,9.178618,1.855474,0.12944
Catania Fontanarossa,0.564423,0.87127,18.413071,8.445586,1.193348
Cagliari Elmas,0.63752,1.222879,20.648634,7.861219,0.643428



[1mStandard Deviation Hazard for: temp_std_rcp26_2041-2070_data.csv[0m



Unnamed: 0_level_0,p95,p999,tempabove35,tempabove40,tempabove45
Airports,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Milano Malpensa,0.613934,1.173375,2.581564,0.62935,0.001843
Bergamo Orio al Serio,0.592966,1.119159,2.953666,0.240175,0.0
Milano Linate,0.590273,1.14047,3.677688,0.623363,0.035009
Roma Fiumicino,0.477681,0.889042,3.516063,0.184398,0.0
Roma Ciampino,0.548494,0.641125,4.420334,0.23839,0.0
Napoli Capodichino,0.408734,0.517252,3.357908,0.284941,0.030912
Palermo Punta Raisi,0.429262,0.615496,1.501407,0.219839,0.0
Catania Fontanarossa,0.384379,0.903022,6.785546,1.309972,0.137948
Cagliari Elmas,0.504563,0.865971,6.575292,0.785529,0.04479
