- Run the standardisation only for valid models with ∆ AIC ≤ 6

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [25]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import logging
logging.basicConfig(level=logging.CRITICAL) # ignore no handles to put in legend warning from matplotlib
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) # ignore Future deprecation warning
import os
import pickle

from classes import boundaries as bd
from classes import snag_MTF_func as sg

%matplotlib inline
%config Completer.use_jedi = False 




#--- Notebook run settings
run = 'command line' # 'manual' 'command line'
#run = 'manual'


# Settings for the multiple linear regression model (MLR)
#--- Arguments
if run == 'command line':
    # Read command line arguments
    arguments = os.environ['NB_ARGS']
    climate_data, alpha, date_exp_str, remove_data = arguments.split(',')
    
    alpha              = float(alpha)   
    remove_data        = bool(remove_data)
    
elif run == 'manual':
    # Enter arguments manually 
    alpha              = 0.01
    date_exp_str       = '230207_Full'
    climate_data_list  = ['CRUclim','CRUNCEPv7','CHELSA30s','WorldClim30s','WorldClim10m',]
    climate_data       = climate_data_list[1]
    remove_data        = True
    
print(date_exp_str, alpha, remove_data)  

230116_Full_Management 0.01 True


In [26]:
#---------------#
#  Import data  #
#---------------#
mtf_type = ['count','cmass']

#--- Experiments
with open('site_reg_results/'+date_exp_str+'_SITE_reg_experiments_list.pkl', 'rb') as f:
    experiments = pickle.load(f)    

# Isolate unique variables in experiment list for standardisation of covars
strings     = [i.replace('MTF ~ ','') for i in experiments]
strings     = [i.split(' * ')[0] for i in strings]
strings     = [i.split(' + ') for i in strings]
flat_list   = [item for sublist in strings for item in sublist]
var_list_in = np.unique(flat_list)    

# Data        
cols = ['Reference','Continent','Country','Site name','MTF','MTF basis', 'DBH mean','MAP','Insects','MATsoil',
        'CDI','Managed','Fire','Mortality','MAT','Humidity','PFT','Soil_water','Soil_water_max']    
    
site_mtf = sg.MTF_database_import('data/MTF_database.xlsx','Site_MTF',engine=True)
site_mtf.loc[site_mtf['Site name'].isna(),['Site name']] = ''
site_mtf.loc[site_mtf['Y coords'].isna(),['Y coords']] = ''
site_mtf.loc[site_mtf['X coords'].isna(),['X coords']] = ''
site_mtf_cp = site_mtf.copy()
site_mtf['MAT'] = site_mtf[(climate_data,'MAT')]
site_mtf['MAP'] = site_mtf[(climate_data,'MAP')]
mtf = site_mtf.loc[:, cols].copy()

mtf.columns = mtf.columns.get_level_values(0)
mtf_cp = mtf.copy()

    
#--- Remove data
if remove_data:
    mtf = mtf[~(mtf.Reference == 'Ritchie2014EstablishmentForest')].copy()
    mtf = mtf[(mtf.Reference != 'Campbell2016CarbonStates')].copy()
    
    if 'FireOnly' in date_exp_str:
        mtf = mtf[(mtf.Fire == 1)]
    
    if 'NoFire' in date_exp_str:
        mtf = mtf[(mtf.Fire == 0)]
    
    if 'NoInsectsFire' in date_exp_str:
        mtf = mtf[(mtf.Fire == 0) & (mtf.Insects == 0)]

    
#--------------------#
#   Transform data   #
#--------------------#

mtf.loc[:, 'MTF'] = mtf.loc[:, 'MTF'].apply(np.log)
site_mtf_cp.loc[:, ['MTF']] = site_mtf_cp.loc[:, ['MTF']].apply(np.log)

# Drop subset based on data subset
if 'Moisture' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Humidity','PFT','Soil_water', 'Soil_water_max'])  
elif 'Management' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Humidity','PFT','Managed']) 
    print('hi')
else:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Humidity','PFT']) 
    

# Rename columns to work with OLS
cols       = mtf.columns.values
index_dbh  = mtf.columns.get_loc('DBH mean')
cols[index_dbh]  = 'DBH_mean'
mtf.columns = cols


#--- Seperate date according to MTF type
mtf_count = mtf[(mtf['MTF basis'] == 'count') & (mtf.MTF.notnull())].copy().reset_index(drop=True)
mtf_cmass = mtf[(mtf['MTF basis'] == 'cmass') & (mtf.MTF.notnull())].copy()
mtf_c     = mtf[(mtf['MTF basis'] == 'c') & (mtf.MTF.notnull())].copy()   

# Combine data into list for loop
data = [mtf_count, mtf_cmass]

hi


In [27]:
standardise_covars_list = [False, True]

# Initialise storage
reg_results = np.empty((len(standardise_covars_list),len(mtf_type),2), dtype='object')

for st in range(len(standardise_covars_list)):
    standardise_covars = standardise_covars_list[st]
    
    if standardise_covars:
        bd.print_header('Standardising covariates')
    for t,tp in enumerate(mtf_type):
        bd.print_TITLE(tp)

        mtf_reg = data[t].copy()


        if standardise_covars:
            
            for var in var_list_in:
                print(var, mtf_reg.loc[:,var].mean())
                if mtf_reg.loc[:,var].std() == 0:
                    print('\n'+tp+'t!\nSetting {} to 0 because Std is 0, i.e. standardisation fails!\n'.format(
                        var))
                    mtf_reg.loc[:,var] = 0
                else:
                    mtf_reg.loc[:,var] = ((mtf_reg.loc[:,var] - mtf_reg.loc[:,var].mean()) /
                                                mtf_reg.loc[:,var].std())  

        # Run regressions
        res_table, result_objs = sg.regression_wrapper(mtf_reg, experiments[:],alpha, tp, 
                                                       climate_data, standardise_covars)
        
        reg_results[st,t,0] = res_table
        reg_results[st,t,1] = result_objs

        display(res_table)

storage_string = 'site_reg_results/'+date_exp_str+'_SITE_'+str(alpha)+'_'+climate_data+'.pkl'
with open(storage_string, 'wb') as f:
    pickle.dump(reg_results, f)



             count             




Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + PFT + DBH_mean + Managed,0.37,147.5,0.5,23.34,1.6,False,PFT,64.0,0,count,CRUNCEPv7,False
MTF ~ MAT + DBH_mean + Managed,0.36,147.0,0.0,23.47,1.62,True,,64.0,1,count,CRUNCEPv7,False
MTF ~ MAT + Humidity + PFT + DBH_mean + Managed,0.36,149.1,2.1,23.34,1.6,False,"Humidity, PFT",64.0,2,count,CRUNCEPv7,False
MTF ~ MAT + Humidity + DBH_mean + Managed,0.35,148.6,1.6,23.47,1.62,False,Humidity,64.0,3,count,CRUNCEPv7,False
MTF ~ MATsoil + PFT + DBH_mean + Managed,0.35,148.9,1.9,24.33,1.53,False,"PFT, Managed",64.0,4,count,CRUNCEPv7,False
MTF ~ MATsoil + DBH_mean + Managed,0.34,148.7,1.7,24.48,1.54,True,,64.0,5,count,CRUNCEPv7,False
MTF ~ MATsoil + Humidity + DBH_mean + Managed,0.34,150.4,3.4,24.44,1.53,False,Humidity,64.0,6,count,CRUNCEPv7,False
MTF ~ MATsoil + Humidity + PFT + DBH_mean + Managed,0.34,150.7,3.7,24.29,1.52,False,"Humidity, PFT, Managed",64.0,7,count,CRUNCEPv7,False
MTF ~ CDI + DBH_mean + Managed,0.29,154.1,7.1,25.12,1.62,False,DBH_mean,64.0,8,count,CRUNCEPv7,False
MTF ~ MATsoil + PFT + DBH_mean,0.29,154.1,7.1,25.23,1.38,False,PFT,64.0,9,count,CRUNCEPv7,False




             cmass             




Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + DBH_mean + Managed,0.42,150.8,0.0,49.3,1.71,True,,64.0,0,cmass,CRUNCEPv7,False
MTF ~ MAT + PFT + DBH_mean + Managed,0.42,151.3,0.5,48.99,1.67,False,PFT,64.0,1,cmass,CRUNCEPv7,False
MTF ~ MAT + Humidity + DBH_mean + Managed,0.42,152.1,1.3,49.33,1.71,False,Humidity,64.0,2,cmass,CRUNCEPv7,False
MTF ~ MAT + Humidity + PFT + DBH_mean + Managed,0.42,152.6,1.8,49.03,1.67,False,"Humidity, PFT",64.0,3,cmass,CRUNCEPv7,False
MTF ~ MATsoil + DBH_mean + Managed,0.41,152.2,1.4,51.17,1.59,True,,64.0,4,cmass,CRUNCEPv7,False
MTF ~ MATsoil + PFT + DBH_mean + Managed,0.41,152.4,1.6,50.86,1.56,False,PFT,64.0,5,cmass,CRUNCEPv7,False
MTF ~ MATsoil + Humidity + PFT + DBH_mean + Managed,0.41,154.1,3.3,50.79,1.56,False,"Humidity, PFT",64.0,6,cmass,CRUNCEPv7,False
MTF ~ MATsoil + Humidity + DBH_mean + Managed,0.4,153.9,3.1,51.11,1.59,False,Humidity,64.0,7,cmass,CRUNCEPv7,False
MTF ~ MATsoil + PFT + DBH_mean,0.34,158.7,7.9,52.53,1.41,False,PFT,64.0,8,cmass,CRUNCEPv7,False
MTF ~ MATsoil + DBH_mean,0.33,158.6,7.8,52.86,1.4,True,,64.0,9,cmass,CRUNCEPv7,False



#------------------------------#
#   Standardising covariates   #
#------------------------------#



             count             


CDI 0.3247904229919729
DBH_mean 26.067421928598616
Humidity 1.7465289169311524
MAP 870.7375
MAT 4.7109375
MATsoil 5.639062529429793
Managed 0.28125
PFT 0.921875


Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + PFT + DBH_mean + Managed,0.37,147.5,0.5,23.34,1.60,False,PFT,64.0,0,count,CRUNCEPv7,True
MTF ~ MAT + MAP + PFT + DBH_mean + Managed,0.37,147.9,0.9,22.17,1.54,False,"MAP, PFT",64.0,1,count,CRUNCEPv7,True
MTF ~ MAT + DBH_mean + Managed,0.36,147.0,0.0,23.47,1.62,True,,64.0,2,count,CRUNCEPv7,True
MTF ~ MAT + MAP + DBH_mean + Managed,0.36,147.6,0.6,22.39,1.57,False,MAP,64.0,3,count,CRUNCEPv7,True
MTF ~ MAT + Humidity + PFT + DBH_mean + Managed,0.36,149.1,2.1,23.34,1.60,False,"Humidity, PFT",64.0,4,count,CRUNCEPv7,True
...,...,...,...,...,...,...,...,...,...,...,...,...
MTF ~ MAP + PFT + DBH_mean,0.04,173.4,26.4,27.14,1.47,False,"MAP, PFT, DBH_mean",64.0,71,count,CRUNCEPv7,True
MTF ~ PFT,0.03,171.6,24.6,27.80,1.47,False,PFT,64.0,72,count,CRUNCEPv7,True
MTF ~ DBH_mean,0.02,172.5,25.5,27.38,1.38,False,DBH_mean,64.0,73,count,CRUNCEPv7,True
MTF ~ Humidity + DBH_mean,0.02,173.2,26.2,27.20,1.35,False,"Humidity, DBH_mean",64.0,74,count,CRUNCEPv7,True




             cmass             


CDI 0.3247904229919729
DBH_mean 26.067421928598616
Humidity 1.7465289169311524
MAP 870.7375
MAT 4.7109375
MATsoil 5.639062529429793
Managed 0.28125
PFT 0.921875


Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + MAP + PFT + DBH_mean + Managed,0.43,151.9,1.1,46.92,1.62,False,"MAP, PFT",64.0,0,cmass,CRUNCEPv7,True
MTF ~ MAT + DBH_mean + Managed,0.42,150.8,0.0,49.30,1.71,True,,64.0,1,cmass,CRUNCEPv7,True
MTF ~ MAT + PFT + DBH_mean + Managed,0.42,151.3,0.5,48.99,1.67,False,PFT,64.0,2,cmass,CRUNCEPv7,True
MTF ~ MAT + MAP + DBH_mean + Managed,0.42,151.6,0.8,47.40,1.67,False,MAP,64.0,3,cmass,CRUNCEPv7,True
MTF ~ MAT + Humidity + DBH_mean + Managed,0.42,152.1,1.3,49.33,1.71,False,Humidity,64.0,4,cmass,CRUNCEPv7,True
...,...,...,...,...,...,...,...,...,...,...,...,...
MTF ~ MAP + PFT + DBH_mean,0.04,183.2,32.4,56.09,1.42,False,"MAP, PFT, DBH_mean",64.0,71,cmass,CRUNCEPv7,True
MTF ~ PFT,0.03,181.4,30.6,56.93,1.40,False,PFT,64.0,72,cmass,CRUNCEPv7,True
MTF ~ Humidity + DBH_mean,0.02,183.0,32.2,56.13,1.29,False,"Humidity, DBH_mean",64.0,73,cmass,CRUNCEPv7,True
MTF ~ DBH_mean,0.01,182.7,31.9,56.51,1.32,False,DBH_mean,64.0,74,cmass,CRUNCEPv7,True
