In [34]:
pwd

'/Users/antje/Documents/LUND/1912_modelling_SWD/2008_Dead_wood_treatment_of_models_review/data/Github_test_dt'

In [35]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [36]:
# Load libraries
import numpy as np
import pandas as pd
import sys
import os
import pickle

from classes import boundaries as bd
from classes import snag_MTF_func as sg

%config Completer.use_jedi = False 




#--- Notebook run settings
run = 'command line' # 'manual' 'command line'
#run = 'manual'

# Settings for the multiple linear regression model (MLR)
#--- Arguments
if run == 'command line':
    # Read command line arguments
    arguments = os.environ['NB_ARGS']
    climate_data, alpha, date_exp_str, remove_data = arguments.split(',')
    
    alpha              = float(alpha)   
    remove_data        = bool(remove_data)
    
elif run == 'manual':
    # Enter arguments manually 
    alpha              = 0.01
    date_exp_str       = '230413_TempSS_FMort_NoInsectsFire'
    climate_data_list  = ['CRUclim','CRUNCEPv7','CHELSA30s','WorldClim30s','WorldClim10m',]
    climate_data       = climate_data_list[1]
    remove_data        = True
print(date_exp_str, alpha, remove_data,climate_data)  

230413_TempSS_FMort_NoInsectsFire 0.01 True CRUNCEPv7


In [37]:
#---------------#
#  Import data  #
#---------------#
mtf_type = ['count','cmass']

#--- Experiments
with open('spe_reg_results/'+date_exp_str+'_Spe_reg_experiments_list.pkl', 'rb') as f:
    experiments = pickle.load(f)    
# Isolate unique variables in experiment list for standardisation of covars
strings     = [i.replace('MTF ~ ','') for i in experiments]
strings     = [i.split(' * ')[0] for i in strings]
strings     = [i.split(' + ') for i in strings]
flat_list   = [item for sublist in strings for item in sublist]
var_list_in = np.unique(flat_list)    

# Data        
cols = ['Reference','Continent','Country','Site name','MTF','MTF basis', 'DBH mean','MAP','Insects','MATsoil',
        'Wood durability','Managed','Fire','Mortality','MAT','PFT',
        'Soil_water','Soil_water_max']    
    
species_mtf = sg.MTF_database_import('data/MTF_database.xlsx','Species_MTF',engine=True)
species_mtf.loc[species_mtf['Site name'].isna(),['Site name']] = ''
species_mtf.loc[species_mtf['Y coords'].isna(),['Y coords']] = ''
species_mtf.loc[species_mtf['X coords'].isna(),['X coords']] = ''
species_mtf_cp = species_mtf.copy()
species_mtf['MAT'] = species_mtf[(climate_data,'MAT')]
species_mtf['MAP'] = species_mtf[(climate_data,'MAP')]
mtf = species_mtf.loc[:, cols].copy()

mtf.columns = mtf.columns.get_level_values(0)
mtf_cp = mtf.copy()


mtf_check = mtf.copy()   

#--- Remove data - based on cooks distance and inapplicable 
if remove_data:
    mtf = mtf[~(mtf.Reference == 'Ritchie2014EstablishmentForest')].copy() 
    mtf = mtf[(mtf.Reference != 'Campbell2016CarbonStates')].copy()
    
    if 'FireOnly' in date_exp_str:
        mtf = mtf[(mtf.Fire == 1)]
    
    if 'NoFire' in date_exp_str:
        mtf = mtf[(mtf.Fire == 0)]
    
    if 'NoInsectsFire' in date_exp_str:
        mtf = mtf[(mtf.Fire == 0) & (mtf.Insects == 0)]
        
    if 'TempSS_FMort' in date_exp_str:
        mtf = mtf[(mtf.MAT > 0) & (mtf.MAT < 8.6)]

        
#--------------------#
#   Transform data   #
#--------------------#
mtf.loc[:, 'DBH mean'] = pd.to_numeric(mtf['DBH mean'])
mtf.loc[:, 'MTF']      = mtf.loc[:, 'MTF'].apply(np.log)

# Drop subset based on data subset
if 'WoodQ' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','PFT','Wood durability']) 
elif 'Moisture' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','PFT','Soil_water', 'Soil_water_max','Managed'])  
elif 'Management' in date_exp_str:
    mtf = mtf.dropna(subset=['DBH mean','MAT','PFT','Managed']) 
else:
    mtf = mtf.dropna(subset=['DBH mean','MAT','PFT','Managed']) 

# Rename columns to work with OLS
cols             = mtf.columns.values
index_dbh        = mtf.columns.get_loc('DBH mean')
index_wq         = mtf.columns.get_loc('Wood durability')
cols[index_wq]   = 'Wood_durability'
cols[index_dbh]  = 'DBH_mean'
mtf.columns      = cols


#--- Seperate date according to MTF type
mtf_count = mtf[(mtf['MTF basis'] == 'count') & (mtf.MTF.notnull())].copy()
mtf_cmass = mtf[(mtf['MTF basis'] == 'cmass') & (mtf.MTF.notnull())].copy()
mtf_c     = mtf[(mtf['MTF basis'] == 'c') & (mtf.MTF.notnull())].copy()   

# Combine data into list for loop
data = [mtf_count, mtf_cmass]

print(len(mtf_count))

49


In [38]:
standardise_covars_list = [False, True]

# Initialise storage
reg_results = np.empty((len(standardise_covars_list),len(mtf_type),2), dtype='object') 

for st in range(len(standardise_covars_list)):
    standardise_covars = standardise_covars_list[st]
    if standardise_covars:
        bd.print_header('Standardising covariates')
    for t,tp in enumerate(mtf_type):
        bd.print_TITLE(tp)

        mtf_reg = data[t].copy()


        if standardise_covars:
            
            for var in var_list_in:
                print(var, mtf_reg.loc[:,var].mean())
                if mtf_reg.loc[:,var].std() == 0:
                    print('\n'+tp+'t!\nSetting {} to 0 because Std is 0, i.e. standardisation fails!\n'.format(
                        var))
                    mtf_reg.loc[:,var] = 0
                else:
                    mtf_reg.loc[:,var] = ((mtf_reg.loc[:,var] - mtf_reg.loc[:,var].mean()) /
                                                mtf_reg.loc[:,var].std())  

        # Run regressions
        res_table, result_objs = sg.regression_wrapper(mtf_reg, experiments[:],alpha, tp, 
                                                       climate_data, standardise_covars)
        
        reg_results[st,t,0] = res_table  # Ranking of models
        reg_results[st,t,1] = result_objs # regression objects for each correspoding model (regression metrics & parameters)

        display(res_table[res_table.prms_sig == True])

storage_string = 'spe_reg_results/'+date_exp_str+'_SPE_'+str(alpha)+'_'+climate_data+'.pkl'
with open(storage_string, 'wb') as f:
    pickle.dump(reg_results, f)



             count             




Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + PFT + DBH_mean,0.43,78.8,0.0,11.43,1.58,True,,49.0,0,count,CRUNCEPv7,False
MTF ~ MATsoil + DBH_mean,0.36,83.3,4.5,12.66,1.37,True,,49.0,2,count,CRUNCEPv7,False
MTF ~ MAT + DBH_mean,0.35,84.4,5.6,12.44,1.26,True,,49.0,3,count,CRUNCEPv7,False
MTF ~ MATsoil,0.14,96.7,17.9,13.19,1.17,True,,49.0,6,count,CRUNCEPv7,False




             cmass             




Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MATsoil + DBH_mean,0.39,88.9,2.5,19.17,1.33,True,,49.0,1,cmass,CRUNCEPv7,False
MTF ~ MAT + DBH_mean,0.38,89.7,3.3,18.76,1.24,True,,49.0,3,cmass,CRUNCEPv7,False
MTF ~ MATsoil,0.17,103.5,17.1,19.84,1.13,True,,49.0,4,cmass,CRUNCEPv7,False



#------------------------------#
#   Standardising covariates   #
#------------------------------#



             count             


DBH_mean 24.978080436353846
MAP 1074.2836734693878
MAT 5.3
MATsoil 6.732653092364876
PFT 0.5306122448979592


Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MAT + PFT + DBH_mean,0.43,78.8,0.0,11.43,1.58,True,,49.0,0,count,CRUNCEPv7,True
MTF ~ MATsoil + DBH_mean,0.36,83.3,4.5,12.66,1.37,True,,49.0,4,count,CRUNCEPv7,True
MTF ~ MAT + DBH_mean,0.35,84.4,5.6,12.44,1.26,True,,49.0,5,count,CRUNCEPv7,True
MTF ~ MATsoil,0.14,96.7,17.9,13.19,1.17,True,,49.0,11,count,CRUNCEPv7,True




             cmass             


DBH_mean 24.978080436353846
MAP 1074.2836734693878
MAT 5.3
MATsoil 6.732653092364876
PFT 0.5306122448979592


Unnamed: 0,r2,AIC,d_AIC,RMSE,DWT,prms_sig,prms_fail,N_obs,ID,MTF_basis,climate,standardised_covars
MTF ~ MATsoil + DBH_mean,0.39,88.9,2.5,19.17,1.33,True,,49.0,2,cmass,CRUNCEPv7,True
MTF ~ MAT + DBH_mean,0.38,89.7,3.3,18.76,1.24,True,,49.0,4,cmass,CRUNCEPv7,True
MTF ~ MATsoil,0.17,103.5,17.1,19.84,1.13,True,,49.0,5,cmass,CRUNCEPv7,True
