In [1]:
# import statements
import numpy as np
import xarray as xr
import glob
import random
import cartopy.io.shapereader as shpreader
import shapely.geometry as sgeom
from shapely.ops import unary_union
from shapely.prepared import prep
import sys
import matplotlib.pyplot as plt
import PolarTestingTrainingSplit_CV
from matplotlib.colors import LinearSegmentedColormap

# Get names of models in which we are testing on
path_to_data = '/home/disk/pna2/aodhan/SurfaceTrendLearning/PoChedleyEtAl2022/TASmaps/*_TrendMaps.nc'
ModelNames = [i[70:-16] for i in glob.glob(path_to_data)]

# Get observational data
observational_trends = glob.glob('/home/disk/p/aodhan/SurfaceTrendLearing/PolarApplication/TASObsTrends/*.npy')
observational_trend_maps = [np.load(observational_trends[i]) for i in range(0,len(observational_trends))]
latitudes = np.linspace(-88.75,88.75,72)
weights = np.cos(np.deg2rad(latitudes)) # these will be used to weight predictors
observational_trend_maps_weighted = np.multiply(observational_trend_maps, weights[np.newaxis,:,np.newaxis])
observational_trend_maps_reshaped = np.reshape(observational_trend_maps_weighted, (3, 72*144))

# Create Custom Color Map
colors = np.array([(22,98,248), (48,141,250), (71,172,251), (100,201,252), (129,222,253),(162,240,254), (215,249,253), (255,255,255), 
                   (255,255,255), (255,249,217), (247,236,155), (254,221,128), (254,202,100), (255,173,71), (252,142,42), (255,101,15)])/255
custom_cmap = LinearSegmentedColormap.from_list('cmap', colors)

# Do CV train-test-split 
TrainingPredictorData, TrainingTargetData, TestingPredictorData, TestingTargetData, TestingTotalTrend = PolarTestingTrainingSplit_CV.training_testing_split(path_to_data)
test_model_data = TestingPredictorData[0][6]

In [10]:
from sklearn.cross_decomposition import PLSRegression
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Ridge
from scipy import stats
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.ensemble import RandomForestRegressor


layer_sizes = [[10,10], [20,10], [10,20], [5,10], [10,5],[10,10,20], [10,20,10], [20,10,10], [10,20,20], [20,20,20], [20,20,10], [20,10,20], [10,10,10], [10,10,10,10], [10,20,20,10], [20,10,10,10], [10,10,10,20]]
for layer_size in layer_sizes:
    # iterate over all CV folds (there should be eqaul number of CV folds as models)
    predictions = []
    validations = []
    gistemp_predictions = []
    era5_predictions = []
    hadcrut_predictions = []

    for model_idx in range(len(ModelNames)):
        
        # Reshape target and predictor data for model
        TrainingTargetDataShape = np.shape(TrainingTargetData[model_idx])
        TestinTargetDataShape = np.shape(TestingTargetData[model_idx])
        TrainingTargetDataReshaped = np.reshape(TrainingTargetData[model_idx], (TrainingTargetDataShape[0], TrainingTargetDataShape[1]*TrainingTargetDataShape[2]))
        TestingTargetDataReshaped = np.reshape(TestingTargetData[model_idx], (TestinTargetDataShape[0], TestinTargetDataShape[1]*TestinTargetDataShape[2]))

        # Model Design
        if len(layer_size) == 2:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1]), activation='relu', solver='adam', random_state=42)
        elif len(layer_size) == 3:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1], layer_size[2]), activation='relu', solver='adam', random_state=42)
        elif len(layer_size) == 4:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1], layer_size[2], layer_size[3]), activation='relu', solver='adam', random_state=42)
        
        # Train model
        TrainingTargetDataReshaped = TrainingTargetDataReshaped[:,0]
        pls_model = MLmodel.fit(TrainingPredictorData[model_idx], TrainingTargetDataReshaped)

        # Predict using trained model
        Y_pred = MLmodel.predict(TestingPredictorData[model_idx])
        
        # Apply trained model to observations
        Y_pred_Gistemp = MLmodel.predict(observational_trend_maps_reshaped[0].reshape(1, -1))
        Y_pred_ERA5 = MLmodel.predict(observational_trend_maps_reshaped[1].reshape(1, -1))
        Y_pred_HadCrut = MLmodel.predict(observational_trend_maps_reshaped[2].reshape(1, -1))
        
        # Save output for plotting
        gistemp_predictions.append(Y_pred_Gistemp)
        era5_predictions.append(Y_pred_ERA5)
        hadcrut_predictions.append(Y_pred_HadCrut)

        validations.append(TestingTargetDataReshaped[:,0])
        predictions.append(Y_pred)
    
    vals = list(np.concatenate(validations).flat)
    preds = list(np.concatenate(predictions).flat)
    allsimulation_r = stats.pearsonr(vals, preds)[0]
    sqrt_mse = np.sqrt(np.nanmean((np.array(vals) - np.array(preds))**2))

    print(layer_size, 'r = ' + str(allsimulation_r)[:4] + ' sqrt(MSE) = ' + str(sqrt_mse)[:5])


[10, 10] r = 0.45 sqrt(MSE) = 0.038
[20, 10] r = 0.64 sqrt(MSE) = 0.034
[10, 20] r = 0.49 sqrt(MSE) = 0.047
[5, 10] r = 0.57 sqrt(MSE) = 0.036
[10, 5] r = 0.37 sqrt(MSE) = 0.087
[10, 10, 20] r = 0.56 sqrt(MSE) = 0.040
[10, 20, 10] r = 0.53 sqrt(MSE) = 0.037
[20, 10, 10] r = 0.66 sqrt(MSE) = 0.027
[10, 20, 20] r = 0.60 sqrt(MSE) = 0.032
[20, 20, 20] r = 0.63 sqrt(MSE) = 0.033
[20, 20, 10] r = 0.49 sqrt(MSE) = 0.043
[20, 10, 20] r = 0.59 sqrt(MSE) = 0.035
[10, 10, 10] r = 0.67 sqrt(MSE) = 0.029
[10, 10, 10, 10] r = 0.65 sqrt(MSE) = 0.027
[10, 20, 20, 10] r = 0.73 sqrt(MSE) = 0.024
[20, 10, 10, 10] r = 0.66 sqrt(MSE) = 0.029
[10, 10, 10, 20] r = 0.66 sqrt(MSE) = 0.030


In [11]:
layer_sizes = [[100,10], [20,10,5],  [10,20,20,10], [30,20,10]]
for layer_size in layer_sizes:
    # iterate over all CV folds (there should be eqaul number of CV folds as models)
    predictions = []
    validations = []
    gistemp_predictions = []
    era5_predictions = []
    hadcrut_predictions = []

    for model_idx in range(len(ModelNames)):
        
        # Reshape target and predictor data for model
        TrainingTargetDataShape = np.shape(TrainingTargetData[model_idx])
        TestinTargetDataShape = np.shape(TestingTargetData[model_idx])
        TrainingTargetDataReshaped = np.reshape(TrainingTargetData[model_idx], (TrainingTargetDataShape[0], TrainingTargetDataShape[1]*TrainingTargetDataShape[2]))
        TestingTargetDataReshaped = np.reshape(TestingTargetData[model_idx], (TestinTargetDataShape[0], TestinTargetDataShape[1]*TestinTargetDataShape[2]))

        # Model Design
        if len(layer_size) == 2:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1]), activation='relu', solver='adam', random_state=42)
        elif len(layer_size) == 3:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1], layer_size[2]), activation='relu', solver='adam', random_state=42)
        elif len(layer_size) == 4:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1], layer_size[2], layer_size[3]), activation='relu', solver='adam', random_state=42)
        
        # Train model
        TrainingTargetDataReshaped = TrainingTargetDataReshaped[:,0]
        pls_model = MLmodel.fit(TrainingPredictorData[model_idx], TrainingTargetDataReshaped)

        # Predict using trained model
        Y_pred = MLmodel.predict(TestingPredictorData[model_idx])
        
        # Apply trained model to observations
        Y_pred_Gistemp = MLmodel.predict(observational_trend_maps_reshaped[0].reshape(1, -1))
        Y_pred_ERA5 = MLmodel.predict(observational_trend_maps_reshaped[1].reshape(1, -1))
        Y_pred_HadCrut = MLmodel.predict(observational_trend_maps_reshaped[2].reshape(1, -1))
        
        # Save output for plotting
        gistemp_predictions.append(Y_pred_Gistemp)
        era5_predictions.append(Y_pred_ERA5)
        hadcrut_predictions.append(Y_pred_HadCrut)

        validations.append(TestingTargetDataReshaped[:,0])
        predictions.append(Y_pred)
    
    vals = list(np.concatenate(validations).flat)
    preds = list(np.concatenate(predictions).flat)
    allsimulation_r = stats.pearsonr(vals, preds)[0]
    sqrt_mse = np.sqrt(np.nanmean((np.array(vals) - np.array(preds))**2))

    print(layer_size, 'r = ' + str(allsimulation_r)[:4] + ' sqrt(MSE) = ' + str(sqrt_mse)[:5])


[100, 10] r = 0.21 sqrt(MSE) = 0.075
[20, 10, 5] r = 0.41 sqrt(MSE) = 0.047
[10, 20, 20, 10] r = 0.73 sqrt(MSE) = 0.024
[30, 20, 10] r = 0.54 sqrt(MSE) = 0.043


In [13]:
alphas = [0.0001, 0.001, 0.01, 0.1, 1,10]
for alpha_ in alphas:
    # iterate over all CV folds (there should be eqaul number of CV folds as models)
    predictions = []
    validations = []
    gistemp_predictions = []
    era5_predictions = []
    hadcrut_predictions = []

    for model_idx in range(len(ModelNames)):
        
        # Reshape target and predictor data for model
        TrainingTargetDataShape = np.shape(TrainingTargetData[model_idx])
        TestinTargetDataShape = np.shape(TestingTargetData[model_idx])
        TrainingTargetDataReshaped = np.reshape(TrainingTargetData[model_idx], (TrainingTargetDataShape[0], TrainingTargetDataShape[1]*TrainingTargetDataShape[2]))
        TestingTargetDataReshaped = np.reshape(TestingTargetData[model_idx], (TestinTargetDataShape[0], TestinTargetDataShape[1]*TestinTargetDataShape[2]))

        # Model Design
        MLmodel =  MLPRegressor(hidden_layer_sizes=(10,20,20,10), activation='relu', solver='adam', alpha=alpha_, random_state=42)
        
        # Train model
        TrainingTargetDataReshaped = TrainingTargetDataReshaped[:,0]
        pls_model = MLmodel.fit(TrainingPredictorData[model_idx], TrainingTargetDataReshaped)

        # Predict using trained model
        Y_pred = MLmodel.predict(TestingPredictorData[model_idx])
        
        # Apply trained model to observations
        Y_pred_Gistemp = MLmodel.predict(observational_trend_maps_reshaped[0].reshape(1, -1))
        Y_pred_ERA5 = MLmodel.predict(observational_trend_maps_reshaped[1].reshape(1, -1))
        Y_pred_HadCrut = MLmodel.predict(observational_trend_maps_reshaped[2].reshape(1, -1))
        
        # Save output for plotting
        gistemp_predictions.append(Y_pred_Gistemp)
        era5_predictions.append(Y_pred_ERA5)
        hadcrut_predictions.append(Y_pred_HadCrut)

        validations.append(TestingTargetDataReshaped[:,0])
        predictions.append(Y_pred)
    
    vals = list(np.concatenate(validations).flat)
    preds = list(np.concatenate(predictions).flat)
    allsimulation_r = stats.pearsonr(vals, preds)[0]
    sqrt_mse = np.sqrt(np.nanmean((np.array(vals) - np.array(preds))**2))

    print(alpha_, 'r = ' + str(allsimulation_r)[:4] + ' sqrt(MSE) = ' + str(sqrt_mse)[:5])


[30, 20, 10] r = 0.73 sqrt(MSE) = 0.024
[30, 20, 10] r = 0.69 sqrt(MSE) = 0.027
[30, 20, 10] r = 0.59 sqrt(MSE) = 0.031
[30, 20, 10] r = 0.64 sqrt(MSE) = 0.030
[30, 20, 10] r = 2.62 sqrt(MSE) = 0.032




[30, 20, 10] r = 0.00 sqrt(MSE) = 0.032
[30, 20, 10] r = 3.97 sqrt(MSE) = 0.032
[30, 20, 10] r = -1.5 sqrt(MSE) = 0.032


In [4]:
from sklearn.neural_network import MLPRegressor
from scipy import stats

alphas = [0.0001, 0.001, 0.01, 0.1, 1,10]
for alpha_ in alphas:
    # iterate over all CV folds (there should be eqaul number of CV folds as models)
    predictions = []
    validations = []
    gistemp_predictions = []
    era5_predictions = []
    hadcrut_predictions = []

    for model_idx in range(len(ModelNames)):
        
        # Reshape target and predictor data for model
        TrainingTargetDataShape = np.shape(TrainingTargetData[model_idx])
        TestinTargetDataShape = np.shape(TestingTargetData[model_idx])
        TrainingTargetDataReshaped = np.reshape(TrainingTargetData[model_idx], (TrainingTargetDataShape[0], TrainingTargetDataShape[1]*TrainingTargetDataShape[2]))
        TestingTargetDataReshaped = np.reshape(TestingTargetData[model_idx], (TestinTargetDataShape[0], TestinTargetDataShape[1]*TestinTargetDataShape[2]))

        # Model Design
        MLmodel =  MLPRegressor(hidden_layer_sizes=(10,10,10), activation='relu', solver='adam', alpha=alpha_, random_state=42)
        
        # Train model
        TrainingTargetDataReshaped = TrainingTargetDataReshaped[:,0]
        pls_model = MLmodel.fit(TrainingPredictorData[model_idx], TrainingTargetDataReshaped)

        # Predict using trained model
        Y_pred = MLmodel.predict(TestingPredictorData[model_idx])
        
        # Apply trained model to observations
        Y_pred_Gistemp = MLmodel.predict(observational_trend_maps_reshaped[0].reshape(1, -1))
        Y_pred_ERA5 = MLmodel.predict(observational_trend_maps_reshaped[1].reshape(1, -1))
        Y_pred_HadCrut = MLmodel.predict(observational_trend_maps_reshaped[2].reshape(1, -1))
        
        # Save output for plotting
        gistemp_predictions.append(Y_pred_Gistemp)
        era5_predictions.append(Y_pred_ERA5)
        hadcrut_predictions.append(Y_pred_HadCrut)

        validations.append(TestingTargetDataReshaped[:,0])
        predictions.append(Y_pred)
    
    vals = list(np.concatenate(validations).flat)
    preds = list(np.concatenate(predictions).flat)
    allsimulation_r = stats.pearsonr(vals, preds)[0]
    sqrt_mse = np.sqrt(np.nanmean((np.array(vals) - np.array(preds))**2))

    print(alpha_, 'r = ' + str(allsimulation_r)[:4] + ' sqrt(MSE) = ' + str(sqrt_mse)[:5])


0.0001 r = 0.67 sqrt(MSE) = 0.029
0.001 r = 0.54 sqrt(MSE) = 0.041
0.01 r = 0.63 sqrt(MSE) = 0.035


KeyboardInterrupt: 

In [30]:
from sklearn.cross_decomposition import PLSRegression
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import Ridge
from scipy import stats
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.ensemble import RandomForestRegressor


layer_base = [10,20,20,10]
layer_sizes = []
for idx in range(4):
    for add in [-8,-4,4,8]:
        adjusted_layer = layer_base.copy()
        adjusted_layer_value = layer_base[idx] + add
        adjusted_layer[idx] = adjusted_layer_value
        #print(adjusted_layer)
        layer_sizes.append(adjusted_layer)
for layer_size in layer_sizes:
    # iterate over all CV folds (there should be eqaul number of CV folds as models)
    predictions = []
    validations = []
    gistemp_predictions = []
    era5_predictions = []
    hadcrut_predictions = []

    for model_idx in range(len(ModelNames)):
        
        # Reshape target and predictor data for model
        TrainingTargetDataShape = np.shape(TrainingTargetData[model_idx])
        TestinTargetDataShape = np.shape(TestingTargetData[model_idx])
        TrainingTargetDataReshaped = np.reshape(TrainingTargetData[model_idx], (TrainingTargetDataShape[0], TrainingTargetDataShape[1]*TrainingTargetDataShape[2]))
        TestingTargetDataReshaped = np.reshape(TestingTargetData[model_idx], (TestinTargetDataShape[0], TestinTargetDataShape[1]*TestinTargetDataShape[2]))

        # Model Design
        if len(layer_size) == 2:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1]), activation='relu', solver='adam', random_state=42)
        elif len(layer_size) == 3:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1], layer_size[2]), activation='relu', solver='adam', random_state=42)
        elif len(layer_size) == 4:
            MLmodel =  MLPRegressor(hidden_layer_sizes=(layer_size[0], layer_size[1], layer_size[2], layer_size[3]), activation='relu', solver='adam', random_state=42)
        
        # Train model
        TrainingTargetDataReshaped = TrainingTargetDataReshaped[:,0]
        pls_model = MLmodel.fit(TrainingPredictorData[model_idx], TrainingTargetDataReshaped)

        # Predict using trained model
        Y_pred = MLmodel.predict(TestingPredictorData[model_idx])
        
        # Apply trained model to observations
        Y_pred_Gistemp = MLmodel.predict(observational_trend_maps_reshaped[0].reshape(1, -1))
        Y_pred_ERA5 = MLmodel.predict(observational_trend_maps_reshaped[1].reshape(1, -1))
        Y_pred_HadCrut = MLmodel.predict(observational_trend_maps_reshaped[2].reshape(1, -1))
        
        # Save output for plotting
        gistemp_predictions.append(Y_pred_Gistemp)
        era5_predictions.append(Y_pred_ERA5)
        hadcrut_predictions.append(Y_pred_HadCrut)

        validations.append(TestingTargetDataReshaped[:,0])
        predictions.append(Y_pred)
    
    vals = list(np.concatenate(validations).flat)
    preds = list(np.concatenate(predictions).flat)
    allsimulation_r = stats.pearsonr(vals, preds)[0]
    sqrt_mse = np.sqrt(np.nanmean((np.array(vals) - np.array(preds))**2))

    print(layer_size, 'r = ' + str(allsimulation_r)[:4] + ' sqrt(MSE) = ' + str(sqrt_mse)[:5])


[2, 20, 20, 10] r = 0.60 sqrt(MSE) = 0.033
[6, 20, 20, 10] r = 0.64 sqrt(MSE) = 0.032
[14, 20, 20, 10] r = 0.63 sqrt(MSE) = 0.033
[18, 20, 20, 10] r = 0.67 sqrt(MSE) = 0.028
[10, 12, 20, 10] r = 0.59 sqrt(MSE) = 0.030
[10, 16, 20, 10] r = 0.63 sqrt(MSE) = 0.030
[10, 24, 20, 10] r = 0.58 sqrt(MSE) = 0.038
[10, 28, 20, 10] r = 0.60 sqrt(MSE) = 0.034
[10, 20, 12, 10] r = 0.65 sqrt(MSE) = 0.029
[10, 20, 16, 10] r = 0.64 sqrt(MSE) = 0.030
[10, 20, 24, 10] r = 0.65 sqrt(MSE) = 0.032
[10, 20, 28, 10] r = 0.60 sqrt(MSE) = 0.034
[10, 20, 20, 2] r = 0.53 sqrt(MSE) = 0.039
[10, 20, 20, 6] r = 0.63 sqrt(MSE) = 0.026
[10, 20, 20, 14] r = 0.66 sqrt(MSE) = 0.029
[10, 20, 20, 18] r = 0.66 sqrt(MSE) = 0.029


In [31]:
from sklearn.neural_network import MLPRegressor
from scipy import stats

alphas = [0.00005, 0.000075, 0.0001, 0.000125, 0.00015, 0.00025,.0005]
for alpha_ in alphas:
    # iterate over all CV folds (there should be eqaul number of CV folds as models)
    predictions = []
    validations = []
    gistemp_predictions = []
    era5_predictions = []
    hadcrut_predictions = []

    for model_idx in range(len(ModelNames)):
        
        # Reshape target and predictor data for model
        TrainingTargetDataShape = np.shape(TrainingTargetData[model_idx])
        TestinTargetDataShape = np.shape(TestingTargetData[model_idx])
        TrainingTargetDataReshaped = np.reshape(TrainingTargetData[model_idx], (TrainingTargetDataShape[0], TrainingTargetDataShape[1]*TrainingTargetDataShape[2]))
        TestingTargetDataReshaped = np.reshape(TestingTargetData[model_idx], (TestinTargetDataShape[0], TestinTargetDataShape[1]*TestinTargetDataShape[2]))

        # Model Design
        MLmodel =  MLPRegressor(hidden_layer_sizes=(10,20,20,10), activation='relu', solver='adam', alpha=alpha_, random_state=42)
        
        # Train model
        TrainingTargetDataReshaped = TrainingTargetDataReshaped[:,0]
        pls_model = MLmodel.fit(TrainingPredictorData[model_idx], TrainingTargetDataReshaped)

        # Predict using trained model
        Y_pred = MLmodel.predict(TestingPredictorData[model_idx])
        
        # Apply trained model to observations
        Y_pred_Gistemp = MLmodel.predict(observational_trend_maps_reshaped[0].reshape(1, -1))
        Y_pred_ERA5 = MLmodel.predict(observational_trend_maps_reshaped[1].reshape(1, -1))
        Y_pred_HadCrut = MLmodel.predict(observational_trend_maps_reshaped[2].reshape(1, -1))
        
        # Save output for plotting
        gistemp_predictions.append(Y_pred_Gistemp)
        era5_predictions.append(Y_pred_ERA5)
        hadcrut_predictions.append(Y_pred_HadCrut)

        validations.append(TestingTargetDataReshaped[:,0])
        predictions.append(Y_pred)
    
    vals = list(np.concatenate(validations).flat)
    preds = list(np.concatenate(predictions).flat)
    allsimulation_r = stats.pearsonr(vals, preds)[0]
    sqrt_mse = np.sqrt(np.nanmean((np.array(vals) - np.array(preds))**2))

    print(alpha_, 'r = ' + str(allsimulation_r)[:4] + ' sqrt(MSE) = ' + str(sqrt_mse)[:5])


5e-05 r = 0.72 sqrt(MSE) = 0.026
7.5e-05 r = 0.72 sqrt(MSE) = 0.025
0.0001 r = 0.73 sqrt(MSE) = 0.024
0.000125 r = 0.69 sqrt(MSE) = 0.027
0.00015 r = 0.65 sqrt(MSE) = 0.029
0.00025 r = 0.68 sqrt(MSE) = 0.027
0.0005 r = 0.71 sqrt(MSE) = 0.026


In [32]:
from sklearn.neural_network import MLPRegressor
from scipy import stats

alphas = [0.000085, 0.000095, 0.000105, 0.000115]
for alpha_ in alphas:
    # iterate over all CV folds (there should be eqaul number of CV folds as models)
    predictions = []
    validations = []
    gistemp_predictions = []
    era5_predictions = []
    hadcrut_predictions = []

    for model_idx in range(len(ModelNames)):
        
        # Reshape target and predictor data for model
        TrainingTargetDataShape = np.shape(TrainingTargetData[model_idx])
        TestinTargetDataShape = np.shape(TestingTargetData[model_idx])
        TrainingTargetDataReshaped = np.reshape(TrainingTargetData[model_idx], (TrainingTargetDataShape[0], TrainingTargetDataShape[1]*TrainingTargetDataShape[2]))
        TestingTargetDataReshaped = np.reshape(TestingTargetData[model_idx], (TestinTargetDataShape[0], TestinTargetDataShape[1]*TestinTargetDataShape[2]))

        # Model Design
        MLmodel =  MLPRegressor(hidden_layer_sizes=(10,20,20,10), activation='relu', solver='adam', alpha=alpha_, random_state=42)
        
        # Train model
        TrainingTargetDataReshaped = TrainingTargetDataReshaped[:,0]
        pls_model = MLmodel.fit(TrainingPredictorData[model_idx], TrainingTargetDataReshaped)

        # Predict using trained model
        Y_pred = MLmodel.predict(TestingPredictorData[model_idx])
        
        # Apply trained model to observations
        Y_pred_Gistemp = MLmodel.predict(observational_trend_maps_reshaped[0].reshape(1, -1))
        Y_pred_ERA5 = MLmodel.predict(observational_trend_maps_reshaped[1].reshape(1, -1))
        Y_pred_HadCrut = MLmodel.predict(observational_trend_maps_reshaped[2].reshape(1, -1))
        
        # Save output for plotting
        gistemp_predictions.append(Y_pred_Gistemp)
        era5_predictions.append(Y_pred_ERA5)
        hadcrut_predictions.append(Y_pred_HadCrut)

        validations.append(TestingTargetDataReshaped[:,0])
        predictions.append(Y_pred)
    
    vals = list(np.concatenate(validations).flat)
    preds = list(np.concatenate(predictions).flat)
    allsimulation_r = stats.pearsonr(vals, preds)[0]
    sqrt_mse = np.sqrt(np.nanmean((np.array(vals) - np.array(preds))**2))

    print(alpha_, 'r = ' + str(allsimulation_r)[:4] + ' sqrt(MSE) = ' + str(sqrt_mse)[:5])


8.5e-05 r = 0.70 sqrt(MSE) = 0.027
9.5e-05 r = 0.66 sqrt(MSE) = 0.027
0.000105 r = 0.68 sqrt(MSE) = 0.026
0.000115 r = 0.67 sqrt(MSE) = 0.027


In [33]:
from sklearn.neural_network import MLPRegressor
from scipy import stats

alphas = [0.00025,.00035, .00045, .00055, .00065, .00075]
for alpha_ in alphas:
    # iterate over all CV folds (there should be eqaul number of CV folds as models)
    predictions = []
    validations = []
    gistemp_predictions = []
    era5_predictions = []
    hadcrut_predictions = []

    for model_idx in range(len(ModelNames)):
        
        # Reshape target and predictor data for model
        TrainingTargetDataShape = np.shape(TrainingTargetData[model_idx])
        TestinTargetDataShape = np.shape(TestingTargetData[model_idx])
        TrainingTargetDataReshaped = np.reshape(TrainingTargetData[model_idx], (TrainingTargetDataShape[0], TrainingTargetDataShape[1]*TrainingTargetDataShape[2]))
        TestingTargetDataReshaped = np.reshape(TestingTargetData[model_idx], (TestinTargetDataShape[0], TestinTargetDataShape[1]*TestinTargetDataShape[2]))

        # Model Design
        MLmodel =  MLPRegressor(hidden_layer_sizes=(10,20,20,10), activation='relu', solver='adam', alpha=alpha_, random_state=42)
        
        # Train model
        TrainingTargetDataReshaped = TrainingTargetDataReshaped[:,0]
        pls_model = MLmodel.fit(TrainingPredictorData[model_idx], TrainingTargetDataReshaped)

        # Predict using trained model
        Y_pred = MLmodel.predict(TestingPredictorData[model_idx])
        
        # Apply trained model to observations
        Y_pred_Gistemp = MLmodel.predict(observational_trend_maps_reshaped[0].reshape(1, -1))
        Y_pred_ERA5 = MLmodel.predict(observational_trend_maps_reshaped[1].reshape(1, -1))
        Y_pred_HadCrut = MLmodel.predict(observational_trend_maps_reshaped[2].reshape(1, -1))
        
        # Save output for plotting
        gistemp_predictions.append(Y_pred_Gistemp)
        era5_predictions.append(Y_pred_ERA5)
        hadcrut_predictions.append(Y_pred_HadCrut)

        validations.append(TestingTargetDataReshaped[:,0])
        predictions.append(Y_pred)
    
    vals = list(np.concatenate(validations).flat)
    preds = list(np.concatenate(predictions).flat)
    allsimulation_r = stats.pearsonr(vals, preds)[0]
    sqrt_mse = np.sqrt(np.nanmean((np.array(vals) - np.array(preds))**2))

    print(alpha_, 'r = ' + str(allsimulation_r)[:4] + ' sqrt(MSE) = ' + str(sqrt_mse)[:5])


0.00025 r = 0.68 sqrt(MSE) = 0.027
0.00035 r = 0.66 sqrt(MSE) = 0.027
0.00045 r = 0.69 sqrt(MSE) = 0.027
0.00055 r = 0.62 sqrt(MSE) = 0.030
0.00065 r = 0.65 sqrt(MSE) = 0.029
0.00075 r = 0.72 sqrt(MSE) = 0.025
