## Importing Header and SoilPrep Files

In [1]:
import import_ipynb 
from Header import *

import import_ipynb
from SoilPrep import *

importing Jupyter notebook from Header.ipynb
importing Jupyter notebook from SoilPrep.ipynb


## Loading Data, MetaData, and Mtree

In [2]:
with open ('meta_data.pickle', 'rb') as file:
    MetaData = pickle.load(file)

sg_filters = MetaData['sg_filters']
window_lengths = MetaData['window_lengths']
prepare_spec = MetaData['prepare_spec']
nbands_sampling = MetaData['nbands_sampling']
target_names = MetaData['target_names']
prepare_target = MetaData['prepare_target']
ml_methods = MetaData['ml_methods']
clr = MetaData['clr']


with open ('data.pickle', 'rb') as file:
    Data = pickle.load(file)

spectra = Data['spectra']
T = Data['T']
NT = Data['NT']
smth_spec = Data['smth_spec']
fod_spec = Data['fod_spec']
cr_spec = Data['cr_spec'] 
log_spec = Data['log_spec']
sampled_spec = Data['sampled_spec']
sampled_cr = Data['sampled_cr']
sampled_fod = Data['sampled_fod']
sampled_log = Data['sampled_log'] 
rand_t = Data['rand_t']
rand_nt = Data['rand_nt'] 


with open ('Mtree.pickle', 'rb') as file:
    Mtree = pickle.load(file)

## Visualizing Processed Spectrum (variable samples)

In [3]:
def plot_spec (sample, process):
    x1 = spectra.iloc[sample,:]
    x1.plot()
    if process == 'continuum':
        x2 = cr_spec.iloc[sample,:]
        x2.plot()
    elif process == 'fod':
        x3 = fod_spec.iloc[sample,:]*100
        x3.plot()
    else:
        x4 = log_spec.iloc[sample,:]/3
        x4.plot()
        
    plt.ylim([-0.6, 0.9])
    
    return


## Correlation between wavelengths and Targets

In [4]:
plt.style.use(['science','notebook','grid'])

def plot_corr (target, prepare_spec, n_bands):
    
    i = target_names.index(target)    
    
    if  prepare_spec == 'spec':
        r_val, p_val = find_rpval (resample_spectra(spectra, n_bands), T[i])
        r_val.iloc[0,:].plot(color = clr[i])
    elif  prepare_spec == 'cr':
        r_cr, p_cr = find_rpval (resample_spectra(cr_spec, n_bands), T[i])
        r_cr.iloc[0,:].plot(color = clr[i])
    elif prepare_spec == 'fod':    
        r_fod, p_fod = find_rpval (resample_spectra(fod_spec, n_bands), T[i])
        r_fod.iloc[0,:].plot(color = clr[i])
    else:
        r_log, p_log = find_rpval (resample_spectra(log_spec, n_bands), T[i])
        r_log.iloc[0,:].plot(color = clr[i])
    
    plt.ylim([-0.9, 0.9])


# Best of all worlds

In [5]:
def best_model_parameters (Mtree, target, method, scorer):
    t=target
    m= method
    
    best_score = -1
    best_n_comp = 'NA'
    
    
    for tp in prepare_target:
        for p in prepare_spec:
            for n in nbands_sampling:
                Y = Mtree[m][t][tp][p][n]
                r2_train = Y['r2_train']
                r2_test = Y['r2_test']
                    
                if scorer == 'iqrp':
                    cur_score = Y['iqrp_test']
                elif scorer == 'rpd':
                    cur_score = Y['rpd_test']
                else:
                    cur_score = Y['r2_test']  
                    
                if cur_score > best_score and r2_train >= r2_test:
                    best_score = cur_score
                    best_tp = tp
                    best_n = n
                    best_p = p
                    if m == 'plsr':
                        best_n_comp = Y['n_comp']
                            
    param_list = [scorer, np.round(best_score,2), 'Spec:', best_p, 'bands:', best_n, 'Tar:', best_tp]                                 
    return (param_list) 

def best_score_for (Mtree, target, scorer):
    
    for method in ml_methods:
        param_list= best_model_parameters (Mtree, target, method, scorer)
        print('For:'+target+'->', param_list, ':'+method)
     
    return

## Plotting Model Accuracy (ipywidgets)

In [6]:
def plot_model_acc (method, target, target_preprocessing, spec_preprocessing, n_bands):
    
    m = method
    t = target
    
    i = target_names.index(target)
    
    
    p = spec_preprocessing
    n = n_bands
    tp = target_preprocessing
    
    #------------ data for creating scatter plots for model accuracy------
    Y = Mtree[m][t][tp][p][n]
    
    y_test = Y['test']
    y_pred = Y['testP']
    y_train = Y['train']
    yhat_pred = Y['trainP']
    
    if m == 'plsr':
        n_com = Y['n_comp']
        y_pred = y_pred[:,0]
        yhat_pred = yhat_pred[:,0]
    
    
    iqrp_test = Y['iqrp_test']
    r2_test = Y['r2_test']
    rpd_test = Y['rpd_test']
    
    iqrp_train = find_iqrp(yhat_pred, y_train)
    r2_train = find_r2(yhat_pred, y_train)
    rpd_train = find_rpd(yhat_pred, y_train)
    
    y_tp = pd.DataFrame({'actual':y_test.values, 'predic': y_pred})
    z = np.polyfit(y_test, y_pred, 1)
    
    yhat_tp = pd.DataFrame({'actual':y_train.values, 'predic': yhat_pred})
    zhat = np.polyfit(y_train, yhat_pred, 1)

    
    #---------plotting all the figures for above data--------------------------------
    
    fig, axes = plt.subplots(2,2, figsize=(16,14))
    
    #with plt.style.context(('ggplot')): ---- PLOT of test-prediction --------------------------------------
    y_tp.plot.scatter(ax= axes[0][0], x="actual", y="predic", alpha=0.8, color = clr[i], edgecolors='k')
    axes[0][0].plot(y_test, np.polyval(z, y_test),  c='blue', linewidth=1)
    axes[0][0].plot(y_test, y_test, color='green', linewidth=1)
    axes[0][0].tick_params(axis='both', labelsize=10)
    axes[0][0].text(0.05, 0.95, target_names[i]+' (Test Data)', transform=axes[0][0].transAxes, fontsize = 20, color = clr[i])
    axes[0][0].text(0.05, 0.90, 'IQRP ={:.2f}'.format(iqrp_test), transform=axes[0][0].transAxes, fontsize = 16)
    axes[0][0].text(0.05, 0.85, 'RPD ={:.2f}'.format(rpd_test), transform=axes[0][0].transAxes, fontsize = 16)
    axes[0][0].text(0.05, 0.80, 'R2 ={:.2f}'.format(np.round(r2_test,3)), transform=axes[0][0].transAxes, fontsize = 16)
    axes[0][0].text(0.95, 0.07, 'Method: '+method, transform=axes[0][0].transAxes, \
                    horizontalalignment='right', fontsize = 20)
    
    if method == 'plsr':
        axes[0][0].text(0.95, 0.03, 'n_component={:.2f}'.format(n_com), transform=axes[0][0].transAxes, 
                    horizontalalignment='right', fontsize = 12)
    
    #---------------------------------- ---- PLOT of train-prediction --------------------------------------
    yhat_tp.plot.scatter(ax= axes[0][1], x="actual", y="predic", alpha=0.8, color = clr[i], edgecolors='k')
    axes[0][1].plot(y_train, np.polyval(zhat, y_train),  c='blue', linewidth=1)
    axes[0][1].plot(y_train, y_train, color='green', linewidth=1)
    axes[0][1].tick_params(axis='both', labelsize=10)
    axes[0][1].text(0.05, 0.95,  target_names[i]+' (Training Data)', transform=axes[0][1].transAxes,fontsize = 20, color = clr[i])
    axes[0][1].text(0.05, 0.90, 'IQRP ={:.2f}'.format(iqrp_train), transform=axes[0][1].transAxes, fontsize = 16)
    axes[0][1].text(0.05, 0.85, 'RPD ={:.2f}'.format(rpd_train), transform=axes[0][1].transAxes, fontsize = 16)
    axes[0][1].text(0.05, 0.80, 'R2 ={:.2f}'.format(np.round(r2_train,3)), transform=axes[0][1].transAxes, fontsize = 16)
    axes[0][1].text(0.95, 0.07, 'Method: '+method, transform=axes[0][1].transAxes, \
                    horizontalalignment='right', fontsize = 20)
    
    #------------------------ Ploting R2 Vs N_bands-----------------------------------
    
    #--- data for creating n_bands vs r2 scores plot -------------------------------
    
    pY = Mtree[m][t]['none'][p]
    pYN = Mtree[m][t]['minmax'][p]
    
    X = nbands_sampling
    
    Yr2 = []
    YNr2 = []
    
    for j in X:
        Yr2.append(pY[j]['r2_test']) 
        YNr2.append(pYN[j]['r2_test'])
        
   
    j = 0
    while j < len(Yr2): 
        if Yr2[j] <= 0:
            Yr2[j] = 0
        if YNr2[j] <= 0:
            YNr2[j] = 0    
        j = j + 1
 

    axes[1][0].stem(X,Yr2)
    axes[1][0].tick_params(axis='both', labelsize=10)
    axes[1][0].text(0.00, 1.01,  target_names[i], transform=axes[1][0].transAxes,fontsize = 20, color = clr[i])
    axes[1][0].text(0.99, 1.01, 'Spec_prep: '+ p, transform=axes[1][0].transAxes, horizontalalignment='right', fontsize = 16)
    axes[1][0].text(0.17, 1.01, '(none)', transform=axes[1][0].transAxes, horizontalalignment='left', fontsize = 16)
    axes[1][0].text(0.60, -0.1, 'n_bands', transform=axes[1][0].transAxes, horizontalalignment='right', fontsize = 16)
    axes[1][0].text(-0.1, 0.5, 'R2 Scores', horizontalalignment='left', verticalalignment='center', \
                    rotation='vertical', transform=axes[1][0].transAxes, fontsize = 16)
    
    
    
    axes[1][1].stem(X,YNr2)
    axes[1][1].tick_params(axis='both', labelsize=10)
    axes[1][1].text(0.00, 1.01,  target_names[i], transform=axes[1][1].transAxes,fontsize = 20, color = clr[i])
    axes[1][1].text(0.99, 1.01, 'Spec_prep: '+ p, transform=axes[1][1].transAxes, horizontalalignment='right', fontsize = 16)
    axes[1][1].text(0.17, 1.01, '(minmax)', transform=axes[1][1].transAxes, horizontalalignment='left', fontsize = 16)
    axes[1][1].text(0.60, -0.1, 'n_bands', transform=axes[1][1].transAxes, horizontalalignment='right', fontsize = 16)
    axes[1][1].text(-0.1, 0.5, 'R2 Scores', horizontalalignment='left', verticalalignment='center', \
                    rotation='vertical', transform=axes[1][1].transAxes, fontsize = 16)
    
    
    return    

In [7]:
# ipywidgets.interact(plot_model_acc, target = target_names, \
#                     spec_preprocessing = prepare_spec, n_bands = nbands_sampling, \
#                     target_preprocessing = prepare_target,  method = ml_methods)