In [67]:
pwd

'/Users/antje/Documents/LUND/1912_modelling_SWD/2008_Dead_wood_treatment_of_models_review/data/Github_test_dt'

- Run the standardisation only for valid models with ∆ AIC ≤ 6

In [88]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [98]:
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import logging
logging.basicConfig(level=logging.CRITICAL) # ignore no handles to put in legend warning from matplotlib
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) # ignore Future deprecation warning
import os
import pickle

from classes import boundaries as bd
from classes import snag_MTF_func as sg

%matplotlib inline
%config Completer.use_jedi = False 




#--- Notebook run settings
run = 'command line' # 'manual' 'command line'
#run = 'manual'


# Settings for the multiple linear regression model (MLR)
#--- Arguments
if run == 'command line':
    # Read command line arguments
    arguments = os.environ['NB_ARGS']
    climate_data, alpha, date_exp_str, remove_data = arguments.split(',')
    
    alpha              = float(alpha)   
    remove_data        = bool(remove_data)
    
elif run == 'manual':
    # Enter arguments manually 
    alpha              = 0.01
    date_exp_str       = '230413_TempSS_FMort_NoInsectsFire'
    climate_data_list  = ['CRUclim','CRUNCEPv7','CHELSA30s','WorldClim30s','WorldClim10m',]
    climate_data       = climate_data_list[1]
    remove_data        = True
    
print(date_exp_str, alpha, remove_data)  

230413_TempSS_FMort_NoInsectsFire 0.05 True


In [99]:
#---------------#
#  Import data  #
#---------------#
mtf_type = ['count','cmass']

#--- Experiments
with open('site_reg_results/'+date_exp_str+'_SITE_reg_experiments_list.pkl', 'rb') as f:
    experiments = pickle.load(f)    

# Isolate unique variables in experiment list for standardisation of covars
strings     = [i.replace('MTF ~ ','') for i in experiments]
strings     = [i.split(' * ')[0] for i in strings]
strings     = [i.split(' + ') for i in strings]
flat_list   = [item for sublist in strings for item in sublist]
var_list_in = np.unique(flat_list)    

# Data        
cols = ['Reference','Continent','Country','Site name','MTF','MTF basis', 'DBH mean','MAP','Insects','MATsoil',
        'Managed','Fire','Mortality','MAT','PFT','Soil_water','Soil_water_max']    
    
site_mtf = sg.MTF_database_import('data/MTF_database.xlsx','Site_MTF',engine=True)
site_mtf.loc[site_mtf['Site name'].isna(),['Site name']] = ''
site_mtf.loc[site_mtf['Y coords'].isna(),['Y coords']] = ''
site_mtf.loc[site_mtf['X coords'].isna(),['X coords']] = ''
site_mtf_cp = site_mtf.copy()
site_mtf['MAT'] = site_mtf[(climate_data,'MAT')]
site_mtf['MAP'] = site_mtf[(climate_data,'MAP')]
mtf = site_mtf.loc[:, cols].copy()

mtf.columns = mtf.columns.get_level_values(0)
mtf_cp = mtf.copy()

    
#--- Remove data
if remove_data:
    mtf = mtf[~(mtf.Reference == 'Ritchie2014EstablishmentForest')].copy()
    mtf = mtf[(mtf.Reference != 'Campbell2016CarbonStates')].copy()
    
    if 'FireOnly' in date_exp_str:
        mtf = mtf[(mtf.Fire == 1)]
    
    if 'NoFire' in date_exp_str:
        mtf = mtf[(mtf.Fire == 0)]
    
    if 'NoInsectsFire' in date_exp_str:
        mtf = mtf[(mtf.Fire == 0) & (mtf.Insects == 0)]
    
    if 'TempSS_FMort' in date_exp_str:
        mtf = mtf[(mtf.MAT > 0) & (mtf.MAT < 8.6)]
    
#--------------------#
#   Transform data   #
#--------------------#

mtf.loc[:, 'MTF'] = mtf.loc[:, 'MTF'].apply(np.log)
site_mtf_cp.loc[:, ['MTF']] = site_mtf_cp.loc[:, ['MTF']].apply(np.log)

# Drop NaNs based on data subset
if 'Moisture' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Managed','PFT','Soil_water','Soil_water_max'])
elif 'Management' not in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT']) 
else:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Managed','PFT']) 


# Rename columns to work with OLS
cols       = mtf.columns.values
index_dbh  = mtf.columns.get_loc('DBH mean')
cols[index_dbh]  = 'DBH_mean'
mtf.columns = cols


#--- Seperate date according to MTF type
mtf_count = mtf[(mtf['MTF basis'] == 'count') & (mtf.MTF.notnull())].copy().reset_index(drop=True)
mtf_cmass = mtf[(mtf['MTF basis'] == 'cmass') & (mtf.MTF.notnull())].copy()
mtf_c     = mtf[(mtf['MTF basis'] == 'c') & (mtf.MTF.notnull())].copy()   

# Combine data into list for loop
data = [mtf_count, mtf_cmass]

len(mtf_count)

25

In [97]:
standardise_covars_list = [False, True]

# Initialise storage
reg_results = np.empty((len(standardise_covars_list),len(mtf_type),2), dtype='object')

for st in range(len(standardise_covars_list)):
    standardise_covars = standardise_covars_list[st]
    
    if standardise_covars:
        bd.print_header('Standardising covariates')
    for t,tp in enumerate(mtf_type):
        bd.print_TITLE(tp)

        mtf_reg = data[t].copy()


        if standardise_covars:
            
            for var in var_list_in:
                print(var, mtf_reg.loc[:,var].mean())
                if mtf_reg.loc[:,var].std() == 0:
                    print('\n'+tp+'t!\nSetting {} to 0 because Std is 0, i.e. standardisation fails!\n'.format(
                        var))
                    mtf_reg.loc[:,var] = 0
                else:
                    mtf_reg.loc[:,var] = ((mtf_reg.loc[:,var] - mtf_reg.loc[:,var].mean()) /
                                                mtf_reg.loc[:,var].std())  

        # Run regressions
        res_table, result_objs = sg.regression_wrapper(mtf_reg, experiments[:],alpha, tp, 
                                                       climate_data, standardise_covars)
        
        reg_results[st,t,0] = res_table
        reg_results[st,t,1] = result_objs
        
        display(res_table[res_table.prms_sig == True])


storage_string = 'site_reg_results/'+date_exp_str+'_SITE_'+str(alpha)+'_'+climate_data+'.pkl'
with open(storage_string, 'wb') as f:
    pickle.dump(reg_results, f)



             count             




Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MATsoil,0.22,67.1,1.5,18.74,1.51,True,,29.0,2,count,CRUclim,False
MTF ~ PFT,0.14,70.0,4.4,19.49,1.08,True,,29.0,6,count,CRUclim,False




             cmass             




Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MATsoil,0.28,69.6,0.5,37.55,1.52,True,,29.0,3,cmass,CRUclim,False
MTF ~ MAT,0.1,75.9,6.8,38.99,1.65,True,,29.0,7,cmass,CRUclim,False
MTF ~ PFT,0.1,75.9,6.8,39.99,1.18,True,,29.0,8,cmass,CRUclim,False



#------------------------------#
#   Standardising covariates   #
#------------------------------#



             count             


DBH_mean 23.84716049682224
MAP 890.6931034482758
MAT 3.8310344827586205
MATsoil 4.92068965681668
PFT 0.8620689655172413


Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MATsoil,0.22,67.1,1.5,18.74,1.51,True,,29.0,4,count,CRUclim,True
MTF ~ PFT,0.14,70.0,4.4,19.49,1.08,True,,29.0,12,count,CRUclim,True




             cmass             


DBH_mean 23.84716049682224
MAP 890.6931034482758
MAT 3.8310344827586205
MATsoil 4.92068965681668
PFT 0.8620689655172413


Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MATsoil,0.28,69.6,0.5,37.55,1.52,True,,29.0,6,cmass,CRUclim,True
MTF ~ MAT,0.1,75.9,6.8,38.99,1.65,True,,29.0,14,cmass,CRUclim,True
MTF ~ PFT,0.1,75.9,6.8,39.99,1.18,True,,29.0,15,cmass,CRUclim,True
