# Fit linear trendline to AARs, PDDs, and snowfall for all sites

In [None]:
import os
import glob
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import sys
from sklearn.linear_model import LinearRegression
from sklearn.kernel_ridge import KernelRidge
from sklearn.isotonic import IsotonicRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
import numpy as np
import seaborn as sns
from scipy.stats import iqr
# Suppress warnings to prevent kernel crashing (future warning from pandas)
import warnings
warnings.filterwarnings("ignore")

## Define paths in directory

In [None]:
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping-application/'
sys.path.append(os.path.join(base_path, 'functions'))
import model_analyze_utils as f

# scm_path = '/Volumes/LaCie/raineyaberle/Research/PhD/snow_cover_mapping/'
scm_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/'
figures_out_path = os.path.join(base_path, 'figures')

## Load compiled glacier boundaries and snowlines

In [None]:
# -----Load glacier boundaries with climate clusters
aois_fn = os.path.join(scm_path, 'compiled_data', 'all_aois_climate_cluster.shp')
aois = gpd.read_file(aois_fn)
aois[['O1Region', 'O2Region']] = aois[['O1Region', 'O2Region']].astype(int)
print('All AOIs with climate clusters loaded from file.')

# -----Load ERA data
era_fn = os.path.join(scm_path, 'compiled_data', 'all_era_data.csv')
era = pd.read_csv(era_fn)
# format dates as datetimes
era['Date'] = pd.to_datetime(era['Date'])
print('All ERA data loaded from file.')

# -----Load and compile snowlines
snowlines_fn = os.path.join(scm_path, 'compiled_data', 'all_snowlines.csv')
snowlines = pd.read_csv(snowlines_fn)
snowlines['datetime'] = pd.to_datetime(snowlines['datetime'], format='mixed')
print('All snowlines loaded from file.')
# snowlines

## Filter snowlines to before September, merge snowlines and ERA data

In [None]:
# Add Month column to snowlines
snowlines['Month'] = pd.DatetimeIndex(snowlines['datetime']).month.values
# Remove observations after August
snowlines_filt = snowlines.loc[snowlines['Month'] <= 8]
# Grab ERA data on dates
snowlines_filt[['Cumulative_Snowfall_mwe', 'Cumulative_Positive_Degree_Days']] = '', ''
for site_name in tqdm(snowlines_filt['site_name'].drop_duplicates().values):
    snowlines_site = snowlines_filt.loc[snowlines_filt['site_name']==site_name]
    snowlines_site.sort_values(by='datetime', inplace=True)
    era_site = era.loc[era['site_name']==site_name]
    merged = pd.merge_asof(snowlines_site, era_site, left_on='datetime', right_on='Date')
    snowlines_filt.loc[snowlines_filt['site_name']==site_name, 'Cumulative_Snowfall_mwe'] = merged['Cumulative_Snowfall_mwe_y']
    snowlines_filt.loc[snowlines_filt['site_name']==site_name, 'Cumulative_Positive_Degree_Days'] = merged['Cumulative_Positive_Degree_Days_y']
snowlines_filt = snowlines_filt.loc[snowlines_filt['Cumulative_Snowfall_mwe']!='']
snowlines_filt.reset_index(drop=True, inplace=True)
snowlines_filt

## Fit linear and non-parametric models to PDDs and Snowfall vs. AARs for each subregion

In [None]:
# Define non-parametric fit function
def svr_fit(X, y):
    model = SVR().fit(X, y)
    score = model.score(X, y)
    return model, score

# Define linear fit function
def linear_fit(X, y):
    model = LinearRegression().fit(X,y)
    score = model.score(X, y)
    return model, score

In [None]:
# Initialize dataframe for storing results
fit_df = pd.DataFrame()

# Iterate over subregions 
for o1, o2 in tqdm(aois[['O1Region', 'O2Region']].drop_duplicates().values):
    # Grab subregion name from file name
    subregion_name, color = f.determine_subregion_name_color(o1, o2)
    # Subset snowlines to subregion
    site_names = aois.loc[(aois['O1Region']==o1) & (aois['O2Region']==o2), 'RGIId'].drop_duplicates().values
    snowlines_subregion = snowlines_filt[snowlines_filt['site_name'].isin(site_names)]
    # Fit linear and SVR models to data
    X = snowlines_subregion_filt[['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']].values
    y = snowlines_subregion_filt['AAR'].values
    if (np.ravel(X)=='').all():
        model_linear, score_linear = np.nan, np.nan
        model_svr, score_svr = np.nan, np.nan
    else:
        model_linear, score_linear = linear_fit(X, y)
        model_svr, score_svr = svr_fit(X, y)
        # plot
        plt.figure(figsize=(8,4))
        plt.plot(X[:,0], y, '.')
        plt.plot(X[:,0], model_linear.predict(X), '.b', label='Linear')
        plt.plot(X[:,0], model_svr.predict(X), '.m', label='SVR')
        plt.legend(loc='upper right')
        plt.xlabel('$\Sigma$PDDs')
        plt.ylabel('AAR')
        plt.ylim(0,1)
        plt.title(subregion_name 
                  + '\nLinear score = ' + str(np.round(score_linear, 4)) 
                  + '\nSVR score = ' + str(np.round(score_svr, 4)))
        plt.show()
    # Save in dataframe
    df = pd.DataFrame({'Subregion': [subregion_name],
                       'coef_linear': [model_linear.coef_[0]],
                       'score_linear': [score_linear],
                       'score_svr': [score_svr],
                       'N': [len(y)]})
    # Concatenate to full dataframe
    fit_df = pd.concat([fit_df, df])
    
fit_df

In [None]:
snowlines_subregion

## Fit linear and non-parametric models to PDDs and Snowfall vs. AARs for each climate cluster

## Fit a linear trend to PDDs and Snowfall vs. AARs for each site separately

In [None]:
# Suppress warnings to prevent kernel crashing (future warning from pandas)
import warnings
warnings.filterwarnings("ignore")

run_MC = False # whether to fit linear trendlines using Monte Carlo bootstrapping

# Load model training data file names
training_data_fns = sorted(glob.glob(os.path.join(scm_path, 'machine_learning', 'training_data_*.csv')))
training_data_fns = [x for x in training_data_fns if '_scaled' not in x]

# Initialize results dataframe
aar_pdd_linear_df = pd.DataFrame()

# Define linear fit function
def linear_fit(X, y, plot=False):
    # fit model to data
    model = LinearRegression().fit(X, y)
    # save stats
    coef = model.coef_
    intercept = model.intercept_
    score = model.score(X, y)
    # plot
    if plot:
        fig, ax = plt.subplots(subplot_kw={'projection': '3d'})
        ax.plot(X[:,0], X[:,1], y, '.k', markersize=1)
        ax.plot(X[:,0], X[:,1], model.predict(X), '-k')
        ax.set_xlabel('$\Sigma$PDDs')
        ax.set_ylabel('$\Sigma$Snowfall [m.w.e.]')
        ax.set_zlabel('AAR')
        ax.grid()
        ax.set_title(score)
        plt.show()
    
    return coef, intercept, score

# Define Monte Carlo simulation function
def monte_carlo_linear_fit(X, y, n=100, ptest=0.8):
    # initialize metrics
    coefs = []  # linear fit coefficients
    intercepts = []  # linear fit coefficients
    scores = []  # scores (R^2)
    # iterate over MC simulations
    for i in range(0,n):
        # split into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=ptest)
        # fit model to data
        model = LinearRegression().fit(X_train, y_train)
        # grab parameters from model
        coefs.append(model.coef_)
        intercepts.append(model.intercept_)
        scores.append(model.score(X, y))
    # save stats for each
    coef_median, coef_iqr = np.nanmedian(coefs), iqr(coefs)
    intercept_median, intercept_iqr = np.nanmedian(intercepts), iqr(intercepts)
    score_median, score_iqr = np.nanmedian(scores), iqr(scores)
    
    return coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr

# Iterate over subregion file names
for training_data_fn in tqdm(training_data_fns):
    # Load training data
    training_data = pd.read_csv(training_data_fn)
    # Grab subregion name from file name
    subregion_name = os.path.basename(training_data_fn).split('training_data_')[1].split('.csv')[0]
    # Iterate over sites
    for site_name in training_data['site_name'].drop_duplicates().values:
        # Subset training data
        training_data_site = training_data.loc[training_data['site_name']==site_name]
        # Fit linear trendline to AAR and Cumulative PDDs using Monte Carlo simulations
        X = training_data_site[['Cumulative_Positive_Degree_Days', 'Cumulative_Snowfall_mwe']].values.reshape(-1, 2)
        y = training_data_site['AAR']
        if run_MC:
            if (np.ravel(X)==0).all():
                coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr = np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
            else:
                coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr = monte_carlo_linear_fit(X, y)
            # Save in dataframe
            df = pd.DataFrame({'site_name': [site_name],
                               'Subregion': [subregion_name],
                               'coef_median': [coef_median],
                               'coef_iqr': [coef_iqr],
                               'intercept_median': [intercept_median],
                               'intercept_iqr': [intercept_iqr],
                               'score_median': [score_median],
                               'score_iqr': [score_iqr],
                               'N': [len(training_data_site)]})
        else:
            if (np.ravel(X)==0).all():
                coef, intercept, score = np.nan, np.nan, np.nan
            else:
                coef, intercept, score = linear_fit(X, y)
            # Save in dataframe
            df = pd.DataFrame({'site_name': [site_name],
                               'Subregion': [subregion_name],
                               'coef_PDD': [coef[0]],
                               'coef_snowfall': [coef[1]],
                               'intercept': [intercept],
                               'score': [score],
                               'N': [len(training_data_site)]})
        # Concatenate to full dataframe
        aar_pdd_linear_df = pd.concat([aar_pdd_linear_df, df])

# Save results
if run_MC:
    aar_pdd_linear_fn = os.path.join(scm_path, 'results', 'aar_pdd_snowfall_linear_fit_MC.csv')
else:
    aar_pdd_linear_fn = os.path.join(scm_path, 'results', 'aar_pdd_snowfall_linear_fit.csv')
aar_pdd_linear_df.to_csv(aar_pdd_linear_fn, index=False)
print('Data table saved to file:', aar_pdd_linear_fn)
aar_pdd_linear_df

## Fit a linear trend to max. PDDs and max. Snowfall vs. min. AARs for each site separately

In [None]:
# Suppress warnings to prevent kernel crashing (future warning from pandas)
import warnings
warnings.filterwarnings("ignore")

run_MC = False # whether to fit linear trendlines using Monte Carlo bootstrapping

# Load model training data file names
training_data_fns = sorted(glob.glob(os.path.join(scm_path, 'machine_learning', 'training_data_*.csv')))
training_data_fns = [x for x in training_data_fns if '_scaled' not in x]

# Initialize results dataframe
aar_pdd_linear_df = pd.DataFrame()

# Define linear fit function
def linear_fit(X, y, plot=False):
    # fit model to data
    model = LinearRegression().fit(X, y)
    # save stats
    coef = model.coef_
    intercept = model.intercept_
    score = model.score(X, y)
    # plot
    if plot:
        fig, ax = plt.subplots(subplot_kw={'projection': '3d'})
        ax.plot(X[:,0], X[:,1], y, '.k', markersize=1)
        ax.plot(X[:,0], X[:,1], model.predict(X), '-k')
        ax.set_xlabel('$\Sigma$PDDs')
        ax.set_ylabel('$\Sigma$Snowfall [m.w.e.]')
        ax.set_zlabel('AAR')
        ax.grid()
        ax.set_title(score)
        plt.show()
    
    return coef, intercept, score

# Define Monte Carlo simulation function
def monte_carlo_linear_fit(X, y, n=100, ptest=0.8):
    # initialize metrics
    coefs = []  # linear fit coefficients
    intercepts = []  # linear fit coefficients
    scores = []  # scores (R^2)
    # iterate over MC simulations
    for i in range(0,n):
        # split into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=ptest)
        # fit model to data
        model = LinearRegression().fit(X_train, y_train)
        # grab parameters from model
        coefs.append(model.coef_)
        intercepts.append(model.intercept_)
        scores.append(model.score(X, y))
    # save stats for each
    coef_median, coef_iqr = np.nanmedian(coefs), iqr(coefs)
    intercept_median, intercept_iqr = np.nanmedian(intercepts), iqr(intercepts)
    score_median, score_iqr = np.nanmedian(scores), iqr(scores)
    
    return coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr

# Iterate over subregion file names
for training_data_fn in tqdm(training_data_fns):
    # Load training data
    training_data = pd.read_csv(training_data_fn)
    # Grab subregion name from file name
    subregion_name = os.path.basename(training_data_fn).split('training_data_')[1].split('.csv')[0]
    # Iterate over sites
    for site_name in training_data['site_name'].drop_duplicates().values:
        # Subset training data
        training_data_site = training_data.loc[training_data['site_name']==site_name]
        # Add year and WOY columns
        training_data_site['Date'] = pd.to_datetime(training_data_site['Date'])
        training_data_site['Year'] = training_data_site['Date'].dt.isocalendar().year
        training_data_site['WOY'] = training_data_site['Date'].dt.isocalendar().week
        # Fit linear trendline to min. AAR, max. PDDs, and min. snowfall using Monte Carlo simulations
        X = training_data_site.groupby('Year')[['Cumulative_Positive_Degree_Days', 
                                        'Cumulative_Snowfall_mwe']].max().values
        y = training_data_site.groupby('Year')['AAR'].min().values
        if run_MC:
            if (np.ravel(X)==0).all():
                coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr = np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
            else:
                coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr = monte_carlo_linear_fit(X, y)
            # Save in dataframe
            df = pd.DataFrame({'site_name': [site_name],
                               'Subregion': [subregion_name],
                               'coef_median': [coef_median],
                               'coef_iqr': [coef_iqr],
                               'intercept_median': [intercept_median],
                               'intercept_iqr': [intercept_iqr],
                               'score_median': [score_median],
                               'score_iqr': [score_iqr],
                               'N': [len(y)]})
        else:
            if (np.ravel(X)==0).all():
                coef, intercept, score = np.nan, np.nan, np.nan
            else:
                coef, intercept, score = linear_fit(X, y)
            # Save in dataframe
            df = pd.DataFrame({'site_name': [site_name],
                               'Subregion': [subregion_name],
                               'coef_PDD': [coef[0]],
                               'coef_snowfall': [coef[1]],
                               'intercept': [intercept],
                               'score': [score],
                               'N': [len(training_data_site)]})
        # Concatenate to full dataframe
        aar_pdd_linear_df = pd.concat([aar_pdd_linear_df, df])

# # Save results
# if run_MC:
#     aar_pdd_linear_fn = os.path.join(scm_path, 'results', 'aar_pdd_snowfall_linear_fit_MC.csv')
# else:
#     aar_pdd_linear_fn = os.path.join(scm_path, 'results', 'aar_pdd_snowfall_linear_fit.csv')
# aar_pdd_linear_df.to_csv(aar_pdd_linear_fn, index=False)
# print('Data table saved to file:', aar_pdd_linear_fn)
# aar_pdd_linear_df

In [None]:
aar_pdd_linear_df

## Fit a linear trend to max. PDDs and max. Snowfall vs. min. AARs for each subregion

In [None]:
# Suppress warnings to prevent kernel crashing (future warning from pandas)
import warnings
warnings.filterwarnings("ignore")

run_MC = False # whether to fit linear trendlines using Monte Carlo bootstrapping

# Load model training data file names
training_data_fns = sorted(glob.glob(os.path.join(scm_path, 'machine_learning', 'training_data_*.csv')))
training_data_fns = [x for x in training_data_fns if '_scaled' not in x]

# Initialize results dataframe
aar_pdd_linear_df = pd.DataFrame()

# Define linear fit function
def linear_fit(X, y, plot=False):
    # fit model to data
    model = LinearRegression().fit(X, y)
    # save stats
    coef = model.coef_
    intercept = model.intercept_
    score = model.score(X, y)
    # plot
    if plot:
        fig, ax = plt.subplots(subplot_kw={'projection': '3d'})
        ax.plot(X[:,0], X[:,1], y, '.k', markersize=1)
        ax.plot(X[:,0], X[:,1], model.predict(X), '-k')
        ax.set_xlabel('$\Sigma$PDDs')
        ax.set_ylabel('$\Sigma$Snowfall [m.w.e.]')
        ax.set_zlabel('AAR')
        ax.grid()
        ax.set_title(score)
        plt.show()
    
    return coef, intercept, score

# Define Monte Carlo simulation function
def monte_carlo_linear_fit(X, y, n=100, ptest=0.8):
    # initialize metrics
    coefs = []  # linear fit coefficients
    intercepts = []  # linear fit coefficients
    scores = []  # scores (R^2)
    # iterate over MC simulations
    for i in range(0,n):
        # split into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=ptest)
        # fit model to data
        model = LinearRegression().fit(X_train, y_train)
        # grab parameters from model
        coefs.append(model.coef_)
        intercepts.append(model.intercept_)
        scores.append(model.score(X, y))
    # save stats for each
    coef_median, coef_iqr = np.nanmedian(coefs), iqr(coefs)
    intercept_median, intercept_iqr = np.nanmedian(intercepts), iqr(intercepts)
    score_median, score_iqr = np.nanmedian(scores), iqr(scores)
    
    return coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr

# Iterate over subregion file names
for training_data_fn in tqdm(training_data_fns):
    # Load training data
    training_data = pd.read_csv(training_data_fn)
    # Grab subregion name from file name
    subregion_name = os.path.basename(training_data_fn).split('training_data_')[1].split('.csv')[0]
        
    # Add year and WOY columns
    training_data['Date'] = pd.to_datetime(training_data['Date'])
    training_data['Year'] = training_data['Date'].dt.isocalendar().year
    training_data['WOY'] = training_data['Date'].dt.isocalendar().week
    # Fit linear trendline to min. AAR, max. PDDs, and min. snowfall using Monte Carlo simulations
    X = training_data.groupby(['Year', 'WOY'])[['Cumulative_Positive_Degree_Days', 
                                                'Cumulative_Snowfall_mwe']].median().reset_index().groupby('Year')[['Cumulative_Positive_Degree_Days', 
                                                                                                                    'Cumulative_Snowfall_mwe']].max().values
    y = training_data.groupby(['Year', 'WOY'])['AAR'].median().reset_index().groupby('Year')['AAR'].min().values
    if run_MC:
        if (np.ravel(X)==0).all():
            coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr = np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
        else:
            coef_median, coef_iqr, intercept_median, intercept_iqr, score_median, score_iqr = monte_carlo_linear_fit(X, y)
        # Save in dataframe
        df = pd.DataFrame({'Subregion': [subregion_name],
                           'coef_median': [coef_median],
                           'coef_iqr': [coef_iqr],
                           'intercept_median': [intercept_median],
                           'intercept_iqr': [intercept_iqr],
                           'score_median': [score_median],
                           'score_iqr': [score_iqr],
                           'N': [len(y)]})
    else:
        if (np.ravel(X)==0).all():
            coef, intercept, score = np.nan, np.nan, np.nan
        else:
            coef, intercept, score = linear_fit(X, y)
        # Save in dataframe
        df = pd.DataFrame({'Subregion': [subregion_name],
                           'coef_PDD': [coef[0]],
                           'coef_snowfall': [coef[1]],
                           'intercept': [intercept],
                           'score': [score],
                           'N': [len(y)]})
    # Concatenate to full dataframe
    aar_pdd_linear_df = pd.concat([aar_pdd_linear_df, df])
aar_pdd_linear_df
# # Save results
# if run_MC:
#     aar_pdd_linear_fn = os.path.join(scm_path, 'results', 'aar_pdd_snowfall_linear_fit_MC.csv')
# else:
#     aar_pdd_linear_fn = os.path.join(scm_path, 'results', 'aar_pdd_snowfall_linear_fit.csv')
# aar_pdd_linear_df.to_csv(aar_pdd_linear_fn, index=False)
# print('Data table saved to file:', aar_pdd_linear_fn)
# aar_pdd_linear_df

## For each site, identify annual AARs, fit linear trend

In [None]:
# # -----Check if already exists in directory
# min_aars_fn = os.path.join(scm_path, 'results', 'minimum_AARs_linear_fit.csv') 
# if not os.path.exists(min_aars_fn):
    
#     # -----For each site, identify minimum AARs, fit linear trendlines
#     # add year column
#     snowlines['Year'] = snowlines['datetime'].dt.isocalendar().year
#     # subset to dates after 2015
#     snowlines_subset = snowlines.loc[snowlines['Year'] > 2016]
#     # initialize dataframe for storing minimum AARs and linear fits
#     min_aars_df = pd.DataFrame()
#     # iterate over site names
#     for site_name in tqdm(snowlines['site_name'].drop_duplicates().values):
#         # subset snowlines to site
#         snowlines_site = snowlines.loc[snowlines['site_name']==site_name]
#         # count number of AAR estimates for each year
#         count_df = snowlines_site.groupby(['Year'])['AAR'].count()
#         # extract minimum AARs and dates
#         min_aars = snowlines_site.groupby(['Year'])['AAR'].min()
#         min_aars = min_aars.to_frame()
#         min_aars['count'] = count_df.values
#         # remove years with less than 20 observations from fit estimates
#         min_aars = min_aars.loc[min_aars['count'] >= 20]
#         # check if more than two observations after filtering
#         if len(min_aars) < 2:
#             print('Less than two years of AARs, skipping...')
#             continue
#         min_dts = []
#         # iterate over years to extract dates
#         for year, min_aar in zip(np.array(min_aars.index), min_aars['AAR'].values):
#             min_dt = snowlines_site.loc[(snowlines_site['Year']==year) & (snowlines_site['AAR']==min_aar)]['datetime'].values[0]
#             min_dts.append(min_dt)
#         # fit a linear model to dates and AARs
#         model = LinearRegression()
#         model_fit = model.fit(np.array(min_aars.index).reshape(-1, 1), min_aars['AAR'])
    
#         # save in dataframe
#         min_aar_df = pd.DataFrame({'site_name': [site_name],
#                                    'minimum_AARs': [list(min_aars['AAR'].values)],
#                                    'minimum_AARs_dts': [min_dts],
#                                    'linear_fit_coef': [model_fit.coef_[0]],
#                                    'linear_fit_intercept': [model_fit.intercept_]
#                                   })
#         # concatenate to full dataframe
#         min_aars_df = pd.concat([min_aars_df, min_aar_df])

#         # plot minimum AARs 
#         # fig, ax = plt.subplots()
#         # ax.plot(snowlines_site['datetime'], snowlines_site['AAR'], '.')
#         # ax.plot(min_dts, min_aars['AAR'].values, '.-b')
#         # ax.grid()
#         # ax.set_title(site_name + '\nm = ' + str(model_fit.coef_))
#         # plt.show()
    
#     # save to file
#     min_aars_df.to_csv(min_aars_fn, index=False)
#     print('Data table saved to file: ', min_aars_fn)
#     min_aars_df.reset_index(drop=True, inplace=True)
#     min_aars_df

# else:

#     min_aars_df = pd.read_csv(min_aars_fn)

# min_aars_df

## Fit linear trends to different subregions

In [None]:
# # -----Grab training data file names
# training_data_fns = sorted(glob.glob(os.path.join(scm_path, 'machine_learning', 'training_data*.csv')))
# training_data_fns = [x for x in training_data_fns if '_scaled' not in x]

# # -----Set up simple plot
# fig, ax = plt.subplots(10, 2, figsize=(10, 24))
# ax = ax.flatten()
# fig2, ax2 = plt.subplots(1, 2, figsize=(12, 6))

# # -----Iterate over subregions
# # Initialize dataframe for storing results
# aar_linear_fit_df = pd.DataFrame()
# # Grab subregion names from file names
# subregion_names = [os.path.basename(x).split('training_data_')[1].split('.csv')[0] 
#                    for x in training_data_fns]
# for i, subregion_name in enumerate(subregion_names):
#     # load training data
#     training_data_subregion_fn = [x for x in training_data_fns if subregion_name in x][0]
#     training_data_subregion = pd.read_csv(training_data_subregion_fn)

#     # grab color for plotting
#     o1, o2 = training_data_subregion[['O1Region', 'O2Region']].values[0]
#     _, color = f.determine_subregion_name_color(o1, o2)

#     # Fit linear regression models
#     model = LinearRegression()
#     model_fit_pdd = model.fit(training_data_subregion['Cumulative_Positive_Degree_Days'].values.reshape(-1, 1), 
#                               training_data_subregion['AAR'].values)
#     model = LinearRegression()
#     model_fit_snowfall = model.fit(training_data_subregion['Cumulative_Snowfall_mwe'].values.reshape(-1, 1), 
#                                    training_data_subregion['AAR'].values)
#     # Save results in dataframe
#     df = pd.DataFrame({'Subregion': [subregion_name],
#                        'Color': [color],
#                        'Linear_Fit_Coefficient_PDD': model_fit_pdd.coef_[0],
#                        'Linear_Fit_Intercept_PDD': model_fit_pdd.intercept_,
#                        'Linear_Fit_Coefficient_Snowfall': model_fit_snowfall.coef_[0],
#                        'Linear_Fit_Intercept': model_fit_snowfall.intercept_
#                       })
#     aar_linear_fit_df = pd.concat([aar_linear_fit_df, df])
#     # Plot
#     # PDDs
#     ax[i*2].plot(training_data_subregion['Cumulative_Positive_Degree_Days'], 
#                  training_data_subregion['AAR'], '.', markersize=0.5, color=color, alpha=0.8)
#     ax[i*2].plot(training_data_subregion['Cumulative_Positive_Degree_Days'], 
#                  model_fit_pdd.predict(training_data_subregion['Cumulative_Positive_Degree_Days'].values.reshape(-1, 1)),
#                  '-', linewidth=3, color=color)
#     ax[i*2].grid()
#     r2 = np.round(model_fit_pdd.score(training_data_subregion['Cumulative_Positive_Degree_Days'].values.reshape(-1, 1), 
#                              training_data_subregion['AAR'].values), 3)
#     ax[i*2].set_title(f'R$^2$ = {r2}')
#     ax2[0].plot(training_data_subregion['Cumulative_Positive_Degree_Days'], 
#                  model_fit_pdd.predict(training_data_subregion['Cumulative_Positive_Degree_Days'].values.reshape(-1, 1)),
#                  '-', linewidth=2, color=color, label=subregion_name)
#     # Snowfall
#     ax[(i*2)+1].plot(training_data_subregion['Cumulative_Snowfall_mwe'], 
#                      training_data_subregion['AAR'], '.', markersize=0.5, color=color, alpha=0.8)
#     ax[(i*2)+1].plot(training_data_subregion['Cumulative_Snowfall_mwe'], 
#                      model_fit_snowfall.predict(training_data_subregion['Cumulative_Snowfall_mwe'].values.reshape(-1, 1)),
#                      '-', linewidth=3, color=color)
#     ax[(i*2)+1].grid()    
#     r2 = np.round(model_fit_snowfall.score(training_data_subregion['Cumulative_Snowfall_mwe'].values.reshape(-1, 1), 
#                                   training_data_subregion['AAR'].values), 3)
#     ax[(i*2)+1].set_title(f'R$^2$ = {r2}')
#     ax[i*2].set_ylabel('AAR')
#     ax[(i*2)+1].set_ylabel('AAR')
#     ax2[1].plot(training_data_subregion['Cumulative_Snowfall_mwe'], 
#                 model_fit_snowfall.predict(training_data_subregion['Cumulative_Snowfall_mwe'].values.reshape(-1, 1)),
#                 '-', linewidth=2, color=color, label=subregion_name)

#     if i==len(subregion_names):
#         ax[i*2].set_xlabel('$\Sigma$PDDs')
#         ax[(i*2)+1].set_xlabel('$\Sigma$Snowfall [m.w.e.]')

# ax2[0].set_ylabel('AAR')
# ax2[0].set_xlabel('$\Sigma$PDDs')
# ax2[0].grid()
# ax2[1].set_xlabel('$\Sigma$Snowfall [m.w.e.]')
# ax2[1].grid()
# ax2[1].legend(loc='center right', bbox_to_anchor=[1.4, 0.4, 0.2, 0.2])
# fig.subplots_adjust(hspace=0.5)
# plt.show()

# # -----Save figures
# fig_fn = os.path.join(figures_out_path, 'aar_climate_linear_fit_subregions.png')
# fig.savefig(fig_fn, dpi=250, bbox_inches='tight')
# fig2_fn = os.path.join(figures_out_path, 'aar_climate_linear_fit_subregions_single_axes.png')
# fig2.savefig(fig2_fn, dpi=250, bbox_inches='tight')