In [6]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [7]:
# Load libraries
import numpy as np
import pandas as pd
import sys
import os
import pickle

from classes import boundaries as bd
from classes import snag_MTF_func as sg

%config Completer.use_jedi = False 




#--- Notebook run settings
run = 'command line' # 'manual' 'command line'
#run = 'manual'

# Settings for the multiple linear regression model (MLR)
#--- Arguments
if run == 'command line':
    # Read command line arguments
    arguments = os.environ['NB_ARGS']
    climate_data, alpha, date_exp_str, remove_data = arguments.split(',')
    
    alpha              = float(alpha)   
    remove_data        = bool(remove_data)
    
elif run == 'manual':
    # Enter arguments manually 
    alpha              = 0.05
    date_exp_str       = '230118_Moisture'
    climate_data_list  = ['CRUclim','CRUNCEPv7','CHELSA30s','WorldClim30s','WorldClim10m',]
    climate_data       = climate_data_list[1]
    remove_data        = True
print(date_exp_str, alpha, remove_data,climate_data)  

230118_Moisture 0.05 True CRUNCEPv7


In [8]:
#---------------#
#  Import data  #
#---------------#
mtf_type = ['count','cmass']

#--- Experiments
with open('spe_reg_results/'+date_exp_str+'_Spe_reg_experiments_list.pkl', 'rb') as f:
    experiments = pickle.load(f)    
# Isolate unique variables in experiment list for standardisation of covars
strings     = [i.replace('MTF ~ ','') for i in experiments]
strings     = [i.split(' * ')[0] for i in strings]
strings     = [i.split(' + ') for i in strings]
flat_list   = [item for sublist in strings for item in sublist]
var_list_in = np.unique(flat_list)    

# Data        
cols = ['Reference','Continent','Country','Site name','MTF','MTF basis', 'DBH mean','MAP','Insects','MATsoil',
        'Wood durability','CN_ratio','CDI','Managed','Fire','Mortality','MAT','Humidity','PFT',
        'Soil_water','Soil_water_max']    
    
species_mtf = sg.MTF_database_import('data/MTF_database.xlsx','Species_MTF',engine=True)
species_mtf.loc[species_mtf['Site name'].isna(),['Site name']] = ''
species_mtf.loc[species_mtf['Y coords'].isna(),['Y coords']] = ''
species_mtf.loc[species_mtf['X coords'].isna(),['X coords']] = ''
species_mtf_cp = species_mtf.copy()
species_mtf['MAT'] = species_mtf[(climate_data,'MAT')]
species_mtf['MAP'] = species_mtf[(climate_data,'MAP')]
mtf = species_mtf.loc[:, cols].copy()

mtf.columns = mtf.columns.get_level_values(0)
mtf_cp = mtf.copy()


mtf_check = mtf.copy()   

#--- Remove data - based on cooks distance and inapplicable 
if remove_data:
    mtf = mtf[~(mtf.Reference == 'Ritchie2014EstablishmentForest')].copy()
    mtf = mtf[(mtf.Reference != 'Campbell2016CarbonStates')].copy()
    
    if 'FireOnly' in date_exp_str:
        mtf = mtf[(mtf.Fire == 1)]
    
    if 'NoFire' in date_exp_str:
        mtf = mtf[(mtf.Fire == 0)]
    
    if 'NoInsectsFire' in date_exp_str:
        mtf = mtf[(mtf.Fire == 0) & (mtf.Insects == 0)]

        
#--------------------#
#   Transform data   #
#--------------------#
mtf.loc[:, 'DBH mean'] = pd.to_numeric(mtf['DBH mean'])
mtf.loc[:, 'MTF']      = mtf.loc[:, 'MTF'].apply(np.log)

# Drop subset based on data subset
if 'WoodQ' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Humidity','PFT','Wood durability','CN_ratio']) 
elif 'Moisture' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Humidity','PFT','Soil_water', 'Soil_water_max'])  
elif 'Management' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Humidity','PFT','Managed']) 
else:
    mtf = mtf.dropna(subset=['DBH mean','MAT','Humidity','PFT']) 

# Rename columns to work with OLS
cols             = mtf.columns.values
index_dbh        = mtf.columns.get_loc('DBH mean')
index_wq         = mtf.columns.get_loc('Wood durability')
cols[index_wq]   = 'Wood_durability'
cols[index_dbh]  = 'DBH_mean'
mtf.columns      = cols


#--- Seperate date according to MTF type
mtf_count = mtf[(mtf['MTF basis'] == 'count') & (mtf.MTF.notnull())].copy()
mtf_cmass = mtf[(mtf['MTF basis'] == 'cmass') & (mtf.MTF.notnull())].copy()
mtf_c     = mtf[(mtf['MTF basis'] == 'c') & (mtf.MTF.notnull())].copy()   

# Combine data into list for loop
data = [mtf_count, mtf_cmass]

print(len(mtf_count))

109


In [10]:
standardise_covars_list = [False, True]

# Initialise storage
reg_results = np.empty((len(standardise_covars_list),len(mtf_type),2), dtype='object') 

for st in range(len(standardise_covars_list)):
    standardise_covars = standardise_covars_list[st]
    if standardise_covars:
        bd.print_header('Standardising covariates')
    for t,tp in enumerate(mtf_type):
        bd.print_TITLE(tp)

        mtf_reg = data[t].copy()


        if standardise_covars:
            
            for var in var_list_in:
                print(var, mtf_reg.loc[:,var].mean())
                if mtf_reg.loc[:,var].std() == 0:
                    print('\n'+tp+'t!\nSetting {} to 0 because Std is 0, i.e. standardisation fails!\n'.format(
                        var))
                    mtf_reg.loc[:,var] = 0
                else:
                    mtf_reg.loc[:,var] = ((mtf_reg.loc[:,var] - mtf_reg.loc[:,var].mean()) /
                                                mtf_reg.loc[:,var].std())  

        # Run regressions
        res_table, result_objs = sg.regression_wrapper(mtf_reg, experiments[:],alpha, tp, 
                                                       climate_data, standardise_covars)
        
        reg_results[st,t,0] = res_table  # Ranking of models
        reg_results[st,t,1] = result_objs # regression objects for each correspoding model (regression metrics & parameters)

        display(res_table)

storage_string = 'spe_reg_results/'+date_exp_str+'_SPE_'+str(alpha)+'_'+climate_data+'.pkl'
with open(storage_string, 'wb') as f:
    pickle.dump(reg_results, f)



             count             




Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + Humidity + PFT + DBH_mean,0.6,205.2,0.0,26.62,1.26,True,,109.0,0,count,CRUNCEPv7,False
MTF ~ MATsoil + Humidity + PFT + DBH_mean,0.52,224.0,18.8,28.54,1.07,False,PFT,109.0,1,count,CRUNCEPv7,False
MTF ~ MAT + PFT + DBH_mean,0.51,225.1,19.9,29.42,0.98,False,PFT,109.0,2,count,CRUNCEPv7,False
MTF ~ MAT + DBH_mean,0.5,227.0,21.8,29.1,0.9,True,,109.0,3,count,CRUNCEPv7,False
MTF ~ Humidity + PFT + DBH_mean,0.48,231.5,26.3,28.58,1.12,True,,109.0,4,count,CRUNCEPv7,False
MTF ~ MATsoil + DBH_mean,0.47,234.4,29.2,31.0,0.81,True,,109.0,5,count,CRUNCEPv7,False
MTF ~ MATsoil + PFT + DBH_mean,0.47,235.1,29.9,31.21,0.85,False,PFT,109.0,6,count,CRUNCEPv7,False
MTF ~ Humidity + DBH_mean,0.46,236.0,30.8,28.25,0.95,True,,109.0,7,count,CRUNCEPv7,False
MTF ~ PFT + DBH_mean,0.38,251.3,46.1,32.3,0.83,False,PFT,109.0,8,count,CRUNCEPv7,False
MTF ~ DBH_mean,0.36,253.0,47.8,31.88,0.76,True,,109.0,9,count,CRUNCEPv7,False




             cmass             




Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + Humidity + PFT + DBH_mean,0.58,227.5,0.0,44.39,1.16,True,,109.0,0,cmass,CRUNCEPv7,False
MTF ~ MAT + Humidity + DBH_mean,0.55,232.4,4.9,44.78,1.01,True,,109.0,1,cmass,CRUNCEPv7,False
MTF ~ Humidity + PFT + DBH_mean,0.47,250.8,23.3,45.97,1.03,True,,109.0,2,cmass,CRUNCEPv7,False
MTF ~ MAT + DBH_mean,0.47,250.9,23.4,48.14,0.83,True,,109.0,3,cmass,CRUNCEPv7,False
MTF ~ Humidity + DBH_mean,0.44,255.6,28.1,45.74,0.88,True,,109.0,4,cmass,CRUNCEPv7,False
MTF ~ MATsoil + DBH_mean,0.42,259.9,32.4,50.72,0.73,True,,109.0,5,cmass,CRUNCEPv7,False
MTF ~ MATsoil + PFT + DBH_mean,0.42,260.4,32.9,50.91,0.75,False,PFT,109.0,6,cmass,CRUNCEPv7,False
MTF ~ DBH_mean,0.34,274.2,46.7,51.23,0.69,True,,109.0,7,cmass,CRUNCEPv7,False
MTF ~ CDI,0.04,313.9,86.4,66.22,0.75,True,,109.0,8,cmass,CRUNCEPv7,False
MTF ~ MATsoil,0.01,317.5,90.0,66.65,0.68,False,MATsoil,109.0,9,cmass,CRUNCEPv7,False



#------------------------------#
#   Standardising covariates   #
#------------------------------#



             count             


CDI 0.3095014336017726
DBH_mean 30.43507263788527
Humidity 1.7656554432160267
MAP 907.3834862385319
MAT 5.720183486238534
MATsoil 6.546789010730358
PFT 0.7614678899082569
Soil_water 0.5596014192891777
Soil_water_max 0.6797675443898648


Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + Humidity + PFT + DBH_mean,0.6,205.2,0.0,26.62,1.26,True,,109.0,0,count,CRUNCEPv7,True
MTF ~ MATsoil + Humidity + PFT + DBH_mean,0.52,224.0,18.8,28.54,1.07,False,PFT,109.0,1,count,CRUNCEPv7,True
MTF ~ MAT + Soil_water_max + PFT + DBH_mean,0.52,224.1,18.9,28.57,1.0,False,"Soil_water_max, PFT",109.0,2,count,CRUNCEPv7,True
MTF ~ MAT + PFT + DBH_mean,0.51,225.1,19.9,29.42,0.98,False,PFT,109.0,3,count,CRUNCEPv7,True
MTF ~ MAT + Soil_water_max + DBH_mean,0.51,225.6,20.4,28.29,0.93,False,Soil_water_max,109.0,4,count,CRUNCEPv7,True
MTF ~ MAT + MAP + PFT + DBH_mean,0.51,226.0,20.8,29.3,0.98,False,MAP,109.0,5,count,CRUNCEPv7,True
MTF ~ MAT + Soil_water + PFT + DBH_mean,0.51,226.6,21.4,29.21,0.99,False,"Soil_water, PFT",109.0,6,count,CRUNCEPv7,True
MTF ~ MAT + DBH_mean,0.5,227.0,21.8,29.1,0.9,True,,109.0,7,count,CRUNCEPv7,True
MTF ~ MAT + Soil_water + DBH_mean,0.5,227.4,22.2,28.83,0.93,False,Soil_water,109.0,8,count,CRUNCEPv7,True
MTF ~ MAT + MAP + DBH_mean,0.5,228.6,23.4,29.02,0.89,False,MAP,109.0,9,count,CRUNCEPv7,True




             cmass             


CDI 0.3095014336017726
DBH_mean 30.43507263788527
Humidity 1.7656554432160267
MAP 907.3834862385319
MAT 5.720183486238534
MATsoil 6.546789010730358
PFT 0.7614678899082569
Soil_water 0.5596014192891777
Soil_water_max 0.6797675443898648


Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + Humidity + PFT + DBH_mean,0.58,227.5,0.0,44.39,1.16,True,,109.0,0,cmass,CRUNCEPv7,True
MTF ~ MAT + Humidity + DBH_mean,0.55,232.4,4.9,44.78,1.01,True,,109.0,1,cmass,CRUNCEPv7,True
MTF ~ MAT + MAP + PFT + DBH_mean,0.48,249.6,22.1,48.19,0.89,False,MAP,109.0,2,cmass,CRUNCEPv7,True
MTF ~ Humidity + PFT + DBH_mean,0.47,250.8,23.3,45.97,1.03,True,,109.0,3,cmass,CRUNCEPv7,True
MTF ~ MAT + DBH_mean,0.47,250.9,23.4,48.14,0.83,True,,109.0,4,cmass,CRUNCEPv7,True
MTF ~ MAT + Soil_water + DBH_mean,0.47,251.2,23.7,47.74,0.86,False,Soil_water,109.0,5,cmass,CRUNCEPv7,True
MTF ~ MAT + MAP + DBH_mean,0.47,252.4,24.9,48.01,0.82,False,MAP,109.0,6,cmass,CRUNCEPv7,True
MTF ~ Humidity + DBH_mean,0.44,255.6,28.1,45.74,0.88,True,,109.0,7,cmass,CRUNCEPv7,True
MTF ~ MATsoil + DBH_mean,0.42,259.9,32.4,50.72,0.73,True,,109.0,8,cmass,CRUNCEPv7,True
MTF ~ MATsoil + PFT + DBH_mean,0.42,260.4,32.9,50.91,0.75,False,PFT,109.0,9,cmass,CRUNCEPv7,True
