In [1]:
#importing modules necessary
import numpy as np
import pandas as pd
import math

#importing modules required for processing spectra
from scipy.signal import savgol_filter
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler

#import package to split train and test data
from sklearn.model_selection import train_test_split


#import modelling packages
from sklearn.model_selection import cross_val_predict
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.cross_decomposition import PLSRegression
from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV

#importing essential package to make a pipeline
from sklearn.pipeline import make_pipeline

#importing visualisation packages
import matplotlib.pyplot as plt
import seaborn as sns

#importing metrics
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

#ignoring warning
import warnings
warnings.filterwarnings('ignore')

  import pandas.util.testing as tm


In [2]:
#finding wavelengths
def wavelength_calc(dataframe):
    wave_length = []
    columns = dataframe.columns
    for i in columns:
        #if len(i.split('X')) ==2 :
        if len(i.split('X')) ==2 :
            wave_length.append(float(i.split('X')[1]))
        else:
            wave_length.append(float(i))
    return wave_length

#Creating a smoothing function
def smooth_fast(spectra,window_size,columns):
    #Smoothing spectra using matrix operations:
    #Create smoothing matrix:
    #start=datetime.now()
    m_rows = len(range(window_size,len(columns)-window_size))
    m_cols = 2*window_size
    matrix = np.zeros((m_rows,m_cols),dtype=int)
    for j in range(0, len(matrix)):
        k = j+1
        matrix[j] = [x for x in range(k,k+2*window_size)]
    #Smoothing spectra using matrix operations:
    n_cols = m_rows
    newspectra = np.zeros((len(spectra),n_cols))
    for i in range(len(matrix)):
        newspectra[:,i] = np.mean(spectra[: ,matrix[i]],axis=1)
    #Add front and end tails (not smoothed):
    #new_spectra = np.asarray(newspectra)
    fronttail = newspectra[:,:1]
    endtail = newspectra[:,-1:]
    for k in range(1,window_size):
        fronttail=np.append(fronttail,newspectra[:,:1], axis=1)
        endtail = np.append(endtail,newspectra[:,-1:], axis =1)
    data = np.concatenate((fronttail, newspectra, endtail), axis=1)
    #result = pd.DataFrame(data=data, columns = columns)
    #print(datetime.now()-start)
    return data

#Function definition for smoothing using Savtizky Golay method
def savitzky(input_array, window_length):
    savgol = savgol_filter(input_array, window_length, polyorder = 2)
    return savgol

def SNV(input_array):
    #spectra_arr = np.asarray(spectra)
    #spectra_t = np.transpose(input_array)#np.transpose(spectra_arr)
    #scale(X, *, axis=0, with_mean=True, with_std=True, copy=True)
    spectrat_snv = scale(input_array, axis=0)#scale(spectra_t,axis=0)
    #spectra_snv = pd.DataFrame(np.transpose(spectrat_snv),columns=columns)
    return spectrat_snv

#Multiplicative scatter correction
def msc(input_array, reference = None):
    ''' Perform Multiplicative scatter correction'''

    #Mean correction
    for i in range(input_array.shape[0]):
        input_array[i,:] -= input_array[i,:].mean()

    # Get the reference spectrum. If not given, estimate from the mean    
    if reference is None:    
        # Calculate mean
        ref = np.mean(input_array, axis=0)
    else:
        ref = reference

    # Define a new data matrix and populate it with the corrected data    
    output_data = np.zeros_like(input_array)
    for i in range(input_array.shape[0]):
        # Run regression
        fit = np.polyfit(ref, input_array[i,:], 1, full=True)
        # Apply correction
        output_data[i,:] = (input_array[i,:] - fit[0][1]) / fit[0][0] 
    #print(fit)
    return output_data
#Apply derivate in second order, applying third and fourth order derivatives will remove all variance in the data. 
#Derivative 2
def derivate_second(input_array, window_length):
    der1 = savgol_filter(input_array, window_length, polyorder = 2,deriv=1)
    der2 = savgol_filter(der1, window_length, polyorder = 2, deriv=2)
    return der2
#Derivative 1
def derivate_first(input_array, window_length):
    der1 = savgol_filter(input_array, window_length, polyorder = 2,deriv=1)
    return der1
#remove baseline
def baseline(input_array,X):
    base = np.zeros((len(input_array),len(X)))
    for i in range(len(input_array)):
        c = np.polyfit(X, input_array[i], 2)
        base[i] = np.polyval(c, X)
    #Baseline removal
    base_remove = input_array - base
    return base_remove

In [3]:

# SAVGOL - 1ST DERUVATIVE - 2ND DERIVATIVE PREPROCESSING
# def savitzky(input_array, window_length):
#     savgol = savgol_filter(input_array, window_length, polyorder = 2)
#     der1 = savgol_filter(savgol, window_length, polyorder = 2,deriv=1)
#     der2 = savgol_filter(der1, window_length, polyorder = 2, deriv=2)
#     return der2

#Apply Standard Normal Variate
# def SNV(input_array):
#     '''input as an array'''
#     #arr = np.asarray(input_array)
#     #transpose = np.transpose(input_array)
#     snv_result = scale(input_array,axis=0)
#     #spectra_snv = pd.DataFrame(np.transpose(spectrat_snv),columns=columns)
#     return snv_result


In [4]:
model_dataset = pd.read_csv('Datasets/merged_cleaned.csv')

In [5]:
X = model_dataset[model_dataset.columns[4:]]
print(X.shape)
y = model_dataset.iloc[:,3]
print(y.shape)

(532, 801)
(532,)


In [6]:
input_array = np.asarray(X, dtype=float)

In [7]:
def plotting_function(dataframe, y_axisval):
    f = plt.figure()
    f.set_figwidth(12)
    f.set_figheight(4)
    for i , row in dataframe.iterrows():
        plt.plot(y_axisval,row)
    plt.show()

In [8]:
#plotting_function(X, wavelength_calc(X))

### SVM regression for raw spectrum

SVM parameters
1. kernel{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’}, default=’rbf’
Specifies the kernel type to be used in the algorithm. It must be one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’ or a callable. If none is given, ‘rbf’ will be used. If a callable is given it is used to precompute the kernel matrix.

2. degree <int>, default=3
Degree of the polynomial kernel function (‘poly’). Ignored by all other kernels.

3. gamma{‘scale’, ‘auto’} or float, default=’scale’
Kernel coefficient for ‘rbf’, ‘poly’ and ‘sigmoid’.

if gamma='scale' (default) is passed then it uses 1 / (n_features * X.var()) as value of gamma,

if ‘auto’, uses 1 / n_features.

Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.

4. coef0 <float>, default=0.0
Independent term in kernel function. It is only significant in ‘poly’ and ‘sigmoid’.

5. tol <float>, default=1e-3
Tolerance for stopping criterion.

6. C <float>, default=1.0
Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty.

7. epsilon <float>, default=0.1
Epsilon in the epsilon-SVR model. It specifies the epsilon-tube within which no penalty is associated in the training loss function with points predicted within a distance epsilon from the actual value.

8. shrinking <bool>, default=True
Whether to use the shrinking heuristic. See the User Guide.

9. cache_size <float>, default=200
Specify the size of the kernel cache (in MB).

10. verbose <bool>, default=False
Enable verbose output. Note that this setting takes advantage of a per-process runtime setting in libsvm that, if enabled, may not work properly in a multithreaded context.

11. max_iter <int>, default=-1
Hard limit on iterations within solver, or -1 for no limit.

In [9]:
regr = make_pipeline(StandardScaler(), SVR()) #C=1.0, epsilon=0.2

In [10]:
regr.fit(X, y)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svr',
                 SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
                     gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                     shrinking=True, tol=0.001, verbose=False))],
         verbose=False)

In [11]:
regr.score(X,y)

0.11346518247196768

In [12]:
#splitting into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=123)

In [13]:
regr.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svr',
                 SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
                     gamma='auto_deprecated', kernel='rbf', max_iter=-1,
                     shrinking=True, tol=0.001, verbose=False))],
         verbose=False)

In [14]:
regr.score(X_test, y_test)

0.11772637735464009

In [15]:
cols = X.columns
print(cols)
print(len(cols))
print(type(cols[0]))

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '791', '792', '793', '794', '795', '796', '797', '798', '799', '800'],
      dtype='object', length=801)
801
<class 'str'>


Using grid search to finf optimal hyper paramters in SVR

In [16]:
# defining parameter range
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']} 
  
grid = GridSearchCV(SVR(), param_grid, refit = True, verbose = 0)
# print best parameter after tuning
# print(grid.best_params_)
  
# print how our model looks after hyper-parameter tuning
#print(grid.best_estimator_)

# grid_predictions = grid.predict(X_test)
# grid.score(X_test, y_test)
# grid.score(X_train, y_train)
spectra = np.asarray(X, dtype = float)
#plotting_function(smooth_fast(spectra, 19, cols),cols)
wave_length = wavelength_calc(X)
result_df = pd.DataFrame(columns=['Trial No','Model','Pre Processing','Training R\u00b2', 'Validation R\u00b2'])

In [17]:
# fitting the model for grid search
#Trial 1 Raw spectra
print("Trial 1")
print("PRE PROCESSING STEP: None")
#input to base line removal is input array
preprocessed_spectra = pd.DataFrame(spectra, columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input for moving window smoothing is an array
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123)
grid.fit(X_train, y_train)
# print best parameter after tuning
print("Trial 1 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 1','Model' : 'SVM','Pre Processing': 'None', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 1
PRE PROCESSING STEP: None
Trial 1 : Best Parameters {'C': 1000, 'gamma': 1, 'kernel': 'rbf'}
Training R²  0.7361680972799547
Validation R²  0.6008417822018887


In [18]:
#Trial 2 Moving Window
print("Trial 2")
print("PRE PROCESSING STEP: Moving Window Smoothing")
#input to base line removal is input array
preprocessed_spectra = pd.DataFrame(smooth_fast(spectra, 19, wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input for moving window smoothing is an array
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123)
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 2 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 2','Model' : 'SVM','Pre Processing': 'MW', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 2
PRE PROCESSING STEP: Moving Window Smoothing
Training R²  0.6921935662478713
Validation R²  0.5629308413747942


In [19]:
#Trial 3 Savitzky Golay smoothing
print("Trial 3")
print("PRE PROCESSING STEP: Savitzky Golay Smoothing")
#Using the same window size as in moving window smoothing
preprocessed_spectra = pd.DataFrame(savitzky(input_array, 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 3 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 3','Model' : 'SVM','Pre Processing': 'SG', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 3
PRE PROCESSING STEP: Savitzky Golay Smoothing
Training R²  0.7111680234479897
Validation R²  0.588550560193477


In [20]:
#Trial 4 SNV
print("Trial 4")
print("PRE PROCESSING STEP: Standard Normal Variate")
#input for SNV is an input array
preprocessed_spectra = pd.DataFrame(SNV(spectra), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 4 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 4','Model' : 'SVM','Pre Processing': 'SNV', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 4
PRE PROCESSING STEP: Standard Normal Variate
Training R²  0.9956241626669088
Validation R²  0.46682103720241175


In [21]:
#Trial 5 Multiplicative Scatter Correction
#Input for MSC is an array
print("Trial 5")
print("PRE PROCESSING STEP: Multiplicative Scatter Correction")
preprocessed_spectra = pd.DataFrame(msc(spectra), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 5 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 5','Model' : 'SVM','Pre Processing': 'MSC', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 5
PRE PROCESSING STEP: Multiplicative Scatter Correction
Training R²  0.16682259861468474
Validation R²  0.02729729181555207


In [22]:
#Trial 6 De-Trending
print("Trial 6")
print("PRE PROCESSING STEP: De-Trending")
#input to base line removal is input array
preprocessed_spectra = pd.DataFrame(baseline(spectra, wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
print("Trial 6 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 6','Model' : 'SVM','Pre Processing': 'De-Trending', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 6
PRE PROCESSING STEP: De-Trending
Trial 6 : Best Parameters {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf'}
Training R²  0.17741110812726002
Validation R²  0.10052917558522012


In [23]:
#Trial 7 Frist Order Derivative
#Input for derivative is an array
print("Trial 7")
print("PRE PROCESSING STEP: First Order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(spectra, 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 7 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 7','Model' : 'SVM','Pre Processing': 'First Order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 7
PRE PROCESSING STEP: First Order Derivative
Training R²  -0.0005737100791920469
Validation R²  -0.0001552605172241428


In [24]:
#Trial 8 Second Order Derivative
#Input for derivative is an array
print("Trial 8")
print("PRE PROCESSING STEP: Second Order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(spectra, 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 7 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 8','Model' : 'SVM','Pre Processing': 'Second Order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 8
PRE PROCESSING STEP: Second Order Derivative
Training R²  -0.010074559283906215
Validation R²  -0.0004564160511721926


In [25]:
#Trial 9 Moving window , SNV
#Input for derivative is an array
print("Trial 9")
print("PRE PROCESSING STEP: Moving window and SNV")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(SNV(smooth_fast(spectra, 19, cols)), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 9 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 9','Model' : 'SVM','Pre Processing': 'Moving window and SNV', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 9
PRE PROCESSING STEP: Moving window and SNV
Training R²  0.9844259425810032
Validation R²  0.750279813901561


In [26]:
#Trial 10 Moving window , MSC
#Input for derivative is an array
print("Trial 10")
print("PRE PROCESSING STEP: Moving window and MSC")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(msc(smooth_fast(spectra, 19, cols), wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 10 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 10','Model' : 'SVM','Pre Processing': 'Moving window and MSC', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 10
PRE PROCESSING STEP: Moving window and MSC
Training R²  0.9782011633187689
Validation R²  0.7286692762516265


In [27]:
#Trial 11 Moving window , De-Trending
#Input for derivative is an array
print("Trial 11")
print("PRE PROCESSING STEP: Moving window and De-Trending")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(baseline(smooth_fast(spectra, 19, cols), wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 11 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 11','Model' : 'SVM','Pre Processing': 'Moving window and De-Trending', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 11
PRE PROCESSING STEP: Moving window and De-Trending
Training R²  0.14463125946953115
Validation R²  0.10378013995562407


In [28]:
#Trial 12 Moving window , First ORder Derivative
#Input for derivative is an array
print("Trial 12")
print("PRE PROCESSING STEP: Moving window and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(smooth_fast(spectra, 19, cols), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 12 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 12','Model' : 'SVM','Pre Processing': 'Moving window and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 12
PRE PROCESSING STEP: Moving window and 1st order Derivative
Training R²  -0.010077233660684692
Validation R²  -0.00046467578101183626


In [30]:
#Trial 13 Moving window , Derivative
#Input for derivative is an array
print("Trial 13")
print("PRE PROCESSING STEP: Moving window and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(smooth_fast(spectra, 19, cols), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
print("Trial 13 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 13','Model' : 'SVM','Pre Processing': 'Moving window and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 13
PRE PROCESSING STEP: Moving window and 2nd order Derivative
Trial 13 : Best Parameters {'C': 0.1, 'gamma': 0.0001, 'kernel': 'rbf'}
Training R²  -0.010074559283894446
Validation R²  -0.0004564160511755233


In [31]:
#Trial 14 Savtizky Golay , SNV
#
print("Trial 14")
print("PRE PROCESSING STEP: Savitzky Golay smoothing and SNV")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(SNV(savitzky(input_array, 19)), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
print("Trial 14 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 14','Model' : 'SVM','Pre Processing': 'Savitzky Golay smoothing and SNV', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 14
PRE PROCESSING STEP: Savitzky Golay smoothing and SNV
Trial 14 : Best Parameters {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Training R²  0.9937514700606833
Validation R²  0.7369968143572675


In [32]:
#Trial 15 Savtizky Golay , MSC
#
print("Trial 15")
print("PRE PROCESSING STEP: Savitzky Golay smoothing and MSC")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(msc(savitzky(input_array, 19)), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 15 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 15','Model' : 'SVM','Pre Processing': 'Savitzky Golay smoothing and MSC', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 15
PRE PROCESSING STEP: Savitzky Golay smoothing and MSC
Training R²    0.12385615828894081
Validation R²  0.06604018711597026


In [33]:
#Trial 16 Savtizky Golay , De-Trending
#
print("Trial 16")
print("PRE PROCESSING STEP: Savitzky Golay smoothing and baseline removal")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(baseline(savitzky(input_array, 19), wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 16 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 16','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing and baseline removal', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 16
PRE PROCESSING STEP: Savitzky Golay smoothing and baseline removal
Training R²    0.16240838149991332
Validation R²  0.10088751495127735


In [34]:
#Trial 17 Savtizky Golay , 1st order Derivative
#
print("Trial 17")
print("PRE PROCESSING STEP: Savitzky Golay smoothing and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(savitzky(input_array, 19), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 16 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 17','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 17
PRE PROCESSING STEP: Savitzky Golay smoothing and 1st order Derivative
Training R²    -0.0002543839693442962
Validation R²  -0.0009061165923158754


In [35]:
#Trial 18 Savtizky Golay , 2nd order Derivative
#
print("Trial 18")
print("PRE PROCESSING STEP: Savitzky Golay smoothing and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(savitzky(input_array, 19), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 16 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 18','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 18
PRE PROCESSING STEP: Savitzky Golay smoothing and 2nd order Derivative
Training R²    -0.010074559283898665
Validation R²  -0.00045641605116997225


In [36]:
#Trial 19 Moving window , SNV and MSC
#Input for derivative is an array
print("Trial 19")
print("PRE PROCESSING STEP: Moving window , SNV and MSC")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(msc(SNV(smooth_fast(spectra, 19, cols))), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 19 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 19','Model' : 'SVM',
                              'Pre Processing': 'Moving window , SNV and MSC', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 19
PRE PROCESSING STEP: Moving window , SNV and MSC
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [37]:
#Trial 20 Moving window , SNV and De-Trending
#Input for derivative is an array
print("Trial 20")
print("PRE PROCESSING STEP: Moving window , SNV and De-Trending")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(baseline(SNV(smooth_fast(spectra, 19, cols)), wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 20 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 20','Model' : 'SVM',
                              'Pre Processing': 'Moving window , SNV and De-Trending', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 20
PRE PROCESSING STEP: Moving window , SNV and De-Trending
Training R²  0.9756757322634553
Validation R²  0.7745809759423785


In [38]:
#Trial 21 Moving window , SNV and 1st order Derivative
#Input for derivative is an array
print("Trial 21")
print("PRE PROCESSING STEP: Moving window , SNV and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(SNV(smooth_fast(spectra, 19, cols)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 21 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 21','Model' : 'SVM',
                              'Pre Processing': 'Moving window , SNV and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 21
PRE PROCESSING STEP: Moving window , SNV and 1st order Derivative
Training R²  0.6996436062646788
Validation R²  0.4312165602930851


In [39]:
#Trial 22 Moving window , SNV and 2nd order Derivative
#Input for derivative is an array
print("Trial 22")
print("PRE PROCESSING STEP: Moving window , SNV and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(SNV(smooth_fast(spectra, 19, cols)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 22 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 22','Model' : 'SVM',
                              'Pre Processing': 'Moving window , SNV and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 22
PRE PROCESSING STEP: Moving window , SNV and 2nd order Derivative
Training R²  0.049340403829331254
Validation R²  0.01830088623778625


In [40]:
#Trial 23 Moving window , MSC and De-Trending
#Input for derivative is an array
print("Trial 23")
print("PRE PROCESSING STEP: Moving window , MSC and De-Trending")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(baseline(msc(smooth_fast(spectra, 19, cols)), wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 22 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 23','Model' : 'SVM',
                              'Pre Processing': 'Moving window , MSC and De-Trending', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 23
PRE PROCESSING STEP: Moving window , MSC and De-Trending
Training R²  0.08766658853688591
Validation R²  0.07036373838345977


In [41]:
#Trial 24 Moving window , SNV and 1st order Derivative
#Input for derivative is an array
print("Trial 24")
print("PRE PROCESSING STEP: Moving window , SNV and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(msc(smooth_fast(spectra, 19, cols)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 21 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 24','Model' : 'SVM',
                              'Pre Processing': 'Moving window , MSC and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 24
PRE PROCESSING STEP: Moving window , SNV and 1st order Derivative
Training R²  -0.004327636593923767
Validation R²  -0.001169249709395137


In [42]:
#Trial 25 Moving window , MSC and 2nd order Derivative
#Input for derivative is an array
print("Trial 25")
print("PRE PROCESSING STEP: Moving window , MSC and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(msc(smooth_fast(spectra, 19, cols)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 25 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 25','Model' : 'SVM',
                              'Pre Processing': 'Moving window , MSC and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 25
PRE PROCESSING STEP: Moving window , MSC and 2nd order Derivative
Training R²  -0.01007441883981297
Validation R²  -0.0004563332622220529


In [43]:
#Trial 26 Moving window , De-Trending and 1st order Derivative
#Input for derivative is an array
print("Trial 26")
print("PRE PROCESSING STEP: Moving window , De-Trending and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(baseline(smooth_fast(spectra, 19, cols), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 26 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 26','Model' : 'SVM',
                              'Pre Processing': 'Moving window , De-Trending and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 26
PRE PROCESSING STEP: Moving window , De-Trending and 1st order Derivative
Training R²  -0.00823021898192322
Validation R²  -0.007781219353901525


In [44]:
#Trial 27 Moving window , De-Trending and 2nd order Derivative
#Input for derivative is an array
print("Trial 27")
print("PRE PROCESSING STEP: Moving window , De-Trending and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(baseline(smooth_fast(spectra, 19, cols), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 26 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 27','Model' : 'SVM',
                              'Pre Processing': 'Moving window , De-Trending and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 27
PRE PROCESSING STEP: Moving window , De-Trending and 2nd order Derivative
Training R²  -0.010074559283894446
Validation R²  -0.0004564160511755233


In [45]:
#Trial 28 Moving window , 1st order Derivative and 2nd order Derivative
#Input for derivative is an array
print("Trial 28")
print("PRE PROCESSING STEP: Moving window , 1st order Derivative and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(derivate_first(smooth_fast(spectra, 19, cols), 19), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 26 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 28','Model' : 'SVM',
                              'Pre Processing': 'Moving window , 1st order Derivative and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 28
PRE PROCESSING STEP: Moving window , 1st order Derivative and 2nd order Derivative
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [46]:
#Trial 29 Savtizky Golay , SNV and MSC
print("Trial 29")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, SNV and MSC")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(msc(SNV(savitzky(input_array, 19))), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 29 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 29','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, SNV and MSC', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 29
PRE PROCESSING STEP: Savitzky Golay smoothing, SNV and MSC
Training R²    -0.01007455928376344
Validation R²  -0.00045641605114488115


In [47]:
#Trial 30 Savtizky Golay , SNV and Baseline
print("Trial 30")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, SNV and De-Trending")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(baseline(SNV(savitzky(input_array, 19)), wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 23 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 30','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, SNV and De-Trending', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 30
PRE PROCESSING STEP: Savitzky Golay smoothing, SNV and De-Trending
Training R²    0.9955513982602855
Validation R²  0.8028321425892481


In [48]:
#Trial 31 Savtizky Golay , SNV and 1st order Derivative
print("Trial 31")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, SNV and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(SNV(savitzky(input_array, 19)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 31 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 31','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, SNV and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 31
PRE PROCESSING STEP: Savitzky Golay smoothing, SNV and 1st order Derivative
Training R²    0.8996069447464506
Validation R²  0.4910175249819891


In [49]:
#Trial 32 Savtizky Golay , SNV and 2nd order Derivative
print("Trial 32")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, SNV and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(SNV(savitzky(input_array, 19)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 32 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 32','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, SNV and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 32
PRE PROCESSING STEP: Savitzky Golay smoothing, SNV and 2nd order Derivative
Training R²    0.06098335421007572
Validation R²  0.031347672589891906


In [50]:
#Trial 33 Savtizky Golay , MSC and De-Trending
print("Trial 33")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, MSC and De-Trending")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(baseline(msc(savitzky(input_array, 19)), wave_length), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 32 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 33','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, MSC and De-Trending', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 33
PRE PROCESSING STEP: Savitzky Golay smoothing, MSC and De-Trending
Training R²    0.05053033991036837
Validation R²  0.03934683122283522


In [51]:
#Trial 34 Savtizky Golay , SNV and 1st order Derivative
print("Trial 34")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, MSC and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(msc(savitzky(input_array, 19)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 34 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 34','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, MSC and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 34
PRE PROCESSING STEP: Savitzky Golay smoothing, MSC and 1st order Derivative
Training R²    0.0024678311445774925
Validation R²  0.0003162193888998077


In [52]:
#Trial 35 Savtizky Golay , SNV and 2nd order Derivative
print("Trial 35")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, MSC and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(msc(savitzky(input_array, 19)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 34 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 35','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, MSC and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 35
PRE PROCESSING STEP: Savitzky Golay smoothing, MSC and 2nd order Derivative
Training R²    -0.010074559283901108
Validation R²  -0.0004564160511724147


In [53]:
#Trial 36 Savtizky Golay , De-Trending and 1st order Derivative
print("Trial 36")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, De-Trending and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(baseline(savitzky(input_array, 19), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 34 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 36','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, MSC and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 36
PRE PROCESSING STEP: Savitzky Golay smoothing, De-Trending and 1st order Derivative
Training R²    0.002128415382883775
Validation R²  0.0012836028263114585


In [54]:
#Trial 37 Savtizky Golay , De-Trending and 1st order Derivative
print("Trial 37")
print("PRE PROCESSING STEP: Savitzky Golay smoothing, De-Trending and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(baseline(savitzky(input_array, 19), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 34 : Best Parameters", grid.best_params_)
print("Training R\u00b2   ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 37','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay smoothing, MSC and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 37
PRE PROCESSING STEP: Savitzky Golay smoothing, De-Trending and 2nd order Derivative
Training R²    -0.010074559283898665
Validation R²  -0.00045641605116997225


In [55]:
#Trial 38 Moving window , SNV and De-Trending, MSC
#Input for derivative is an array
print("Trial 38")
print("PRE PROCESSING STEP: Moving window , SNV, De-Trending and MSC")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(msc(baseline(SNV(smooth_fast(spectra, 19, cols)), wave_length)), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 38 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 38','Model' : 'SVM',
                              'Pre Processing': 'Moving window , SNV, De-Trending and MSC', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 38
PRE PROCESSING STEP: Moving window , SNV, De-Trending and MSC
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [56]:
#Trial 39 Moving window , SNV, MSC and 1st order Derivative
#Input for derivative is an array
print("Trial 39")
print("PRE PROCESSING STEP: Moving window , SNV, MSC and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(msc(SNV(smooth_fast(spectra, 19, cols)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 38 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 39','Model' : 'SVM',
                              'Pre Processing': 'Moving window, SNV, MSC and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 39
PRE PROCESSING STEP: Moving window , SNV, MSC and 1st order Derivative
Training R²  0.9370597183016669
Validation R²  0.38724444509282685


In [57]:
#Trial 40 Moving window , SNV, MSC and 2nd order Derivative
#Input for derivative is an array
print("Trial 40")
print("PRE PROCESSING STEP: Moving window , SNV, MSC and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(msc(SNV(smooth_fast(spectra, 19, cols))), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 40 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 40','Model' : 'SVM',
                              'Pre Processing': 'Moving window, SNV, MSC and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 40
PRE PROCESSING STEP: Moving window , SNV, MSC and 2nd order Derivative
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [58]:
#Trial 41 Moving window, SNV, De-Trending and 1st order Derivative
#Input for derivative is an array
print("Trial 41")
print("PRE PROCESSING STEP: Moving window, SNV, De-Trending and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(baseline(SNV(smooth_fast(spectra, 19, cols)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 41 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 41','Model' : 'SVM',
                              'Pre Processing': 'Moving window, SNV, De-Trending and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 41
PRE PROCESSING STEP: Moving window, SNV, De-Trending and 1st order Derivative
Training R²  0.6950149888639038
Validation R²  0.42244322894304964


In [59]:
#Trial 42 Moving window, SNV, De-Trending and 2nd order Derivative
#Input for derivative is an array
print("Trial 42")
print("PRE PROCESSING STEP: Moving window, SNV, De-Trending and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(baseline(SNV(smooth_fast(spectra, 19, cols)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 42 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 42','Model' : 'SVM',
                              'Pre Processing': 'Moving window, SNV, De-Trending and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 42
PRE PROCESSING STEP: Moving window, SNV, De-Trending and 2nd order Derivative
Training R²  0.049340403829331574
Validation R²  0.01830088623778625


In [60]:
#Trial 43 Moving window, MSC, De-Trending and 1st order Derivative
#Input for derivative is an array
print("Trial 43")
print("PRE PROCESSING STEP: Moving window, MSC, De-Trending and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(baseline(msc(smooth_fast(spectra, 19, cols)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 43 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 43','Model' : 'SVM',
                              'Pre Processing': 'Moving window, MSC, De-Trending and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 43
PRE PROCESSING STEP: Moving window, MSC, De-Trending and 1st order Derivative
Training R²  -0.004038523571543662
Validation R²  -0.0013248935783458649


In [61]:
#Trial 44 Moving window, MSC, De-Trending and 2nd order Derivative
#Input for derivative is an array
print("Trial 44")
print("PRE PROCESSING STEP: Moving window, MSC, De-Trending and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(baseline(msc(smooth_fast(spectra, 19, cols)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 43 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 44','Model' : 'SVM',
                              'Pre Processing': 'Moving window, MSC, De-Trending and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 44
PRE PROCESSING STEP: Moving window, MSC, De-Trending and 2nd order Derivative
Training R²  -0.01007441883981297
Validation R²  -0.0004563332622220529


In [62]:
#Trial 45 Savitzky Golay, SNV and De-Trending, MSC
#Input for derivative is an array
print("Trial 45")
print("PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending and MSC")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(msc(baseline(SNV(savitzky(spectra, 19)), wave_length)), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 45 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 45','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, SNV, De-Trending and MSC', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 45
PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending and MSC
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [63]:
#Trial 46 Savitzky Golay, SNV, MSC and 1st order Derivative
#Input for derivative is an array
print("Trial 46")
print("PRE PROCESSING STEP: Savitzky Golay, SNV, MSC and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(msc(SNV(savitzky(spectra, 19)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 38 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 46','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, SNV, MSC and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 46
PRE PROCESSING STEP: Savitzky Golay, SNV, MSC and 1st order Derivative
Training R²  0.9829308299438972
Validation R²  0.3043178329900287


In [64]:
#Trial 47 Savitzky Golay, SNV, MSC and 2nd order Derivative
#Input for derivative is an array
print("Trial 47")
print("PRE PROCESSING STEP: Savitzky Golay, SNV, MSC and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(msc(SNV(savitzky(spectra, 19))), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 40 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 47','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, SNV, MSC and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 47
PRE PROCESSING STEP: Savitzky Golay, SNV, MSC and 2nd order Derivative
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [65]:
#Trial 48 Savitzky Golay, SNV, De-Trending and 1st order Derivative
#Input for derivative is an array
print("Trial 48")
print("PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(baseline(SNV(savitzky(spectra, 19)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 41 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 48','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, SNV, De-Trending and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 48
PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending and 1st order Derivative
Training R²  0.8875216640480004
Validation R²  0.465087330806887


In [66]:
#Trial 49 Savitzky Golay, SNV, De-Trending and 2nd order Derivative
#Input for derivative is an array
print("Trial 49")
print("PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(baseline(SNV(savitzky(spectra, 19)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 42 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 49','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, SNV, De-Trending and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 49
PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending and 2nd order Derivative
Training R²  0.0609833542100754
Validation R²  0.03134767258979465


In [67]:
#Trial 50 Savitzky Golay, MSC, De-Trending and 1st order Derivative
#Input for derivative is an array
print("Trial 50")
print("PRE PROCESSING STEP: Savitzky Golay, MSC, De-Trending and 1st order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(baseline(msc(savitzky(spectra, 19)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 43 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 50','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, MSC, De-Trending and 1st order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 50
PRE PROCESSING STEP: Savitzky Golay, MSC, De-Trending and 1st order Derivative
Training R²  0.002262351123856421
Validation R²  -0.0001324444702448524


In [68]:
#Trial 51 Savitzky Golay, MSC, De-Trending and 2nd order Derivative
#Input for derivative is an array
print("Trial 51")
print("PRE PROCESSING STEP: Savitzky Golay, MSC, De-Trending and 2nd order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(baseline(msc(savitzky(spectra, 19)), wave_length), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 43 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 51','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, MSC, De-Trending and 2nd order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 51
PRE PROCESSING STEP: Savitzky Golay, MSC, De-Trending and 2nd order Derivative
Training R²  -0.010074559283901108
Validation R²  -0.0004564160511724147


In [69]:
#Trial 52 Moving window , SNV, De-Trending, MSC and 1st Order Derivative
#Input for derivative is an array
print("Trial 52")
print("PRE PROCESSING STEP: Moving window , SNV, De-Trending, MSC and 1st Order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(msc(baseline(SNV(smooth_fast(spectra, 19, cols)), wave_length)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 38 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 52','Model' : 'SVM',
                              'Pre Processing': 'Moving window , SNV, De-Trending, MSC and 1st Order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 52
PRE PROCESSING STEP: Moving window , SNV, De-Trending, MSC and 1st Order Derivative
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [70]:
#Trial 53 Moving window , SNV, De-Trending, MSC and 2nd Order Derivative
#Input for derivative is an array
print("Trial 53")
print("PRE PROCESSING STEP: Moving window , SNV, De-Trending, MSC and 2nd Order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(msc(baseline(SNV(smooth_fast(spectra, 19, cols)), wave_length)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 38 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 53','Model' : 'SVM',
                              'Pre Processing': 'Moving window , SNV, De-Trending, MSC and 2nd Order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 53
PRE PROCESSING STEP: Moving window , SNV, De-Trending, MSC and 2nd Order Derivative
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [71]:
#Trial 54 Savitzky Golay, SNV, De-Trending, MSC and 1st Order Derivative
#Input for derivative is an array
print("Trial 54")
print("PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending, MSC and 1st Order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_first(msc(baseline(SNV(savitzky(spectra, 19)), wave_length)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 38 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 54','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, SNV, De-Trending, MSC and 1st Order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 54
PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending, MSC and 1st Order Derivative
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [72]:
#Trial 55 Savitzky Golay, SNV, De-Trending, MSC and 2nd Order Derivative
#Input for derivative is an array
print("Trial 55")
print("PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending, MSC and 2nd Order Derivative")
#window length is 19 same as that for moving window smoothing
preprocessed_spectra = pd.DataFrame(derivate_second(msc(baseline(SNV(savitzky(spectra, 19)), wave_length)), 19), columns=cols)
#plotting_function(preprocessed_spectra, wave_length)
#input to base line removal is a dataframe
X_train, X_test, y_train, y_test = train_test_split(preprocessed_spectra, y, test_size=0.30, random_state=123) 
grid.fit(X_train, y_train)
# print best parameter after tuning
#print("Trial 38 : Best Parameters", grid.best_params_)
print("Training R\u00b2 ", grid.score(X_train, y_train))
print("Validation R\u00b2 ",grid.score(X_test, y_test))
result_df = result_df.append({'Trial No' : 'Trial 55','Model' : 'SVM',
                              'Pre Processing': 'Savitzky Golay, SNV, De-Trending, MSC and 2nd Order Derivative', 
                              'Training R\u00b2' : grid.score(X_train, y_train), 
                              'Validation R\u00b2' : grid.score(X_test, y_test)}, 
                             ignore_index=True)

Trial 55
PRE PROCESSING STEP: Savitzky Golay, SNV, De-Trending, MSC and 2nd Order Derivative
Training R²  -0.01007455928389489
Validation R²  -0.00045641605117463513


In [73]:
result_df

Unnamed: 0,Trial No,Model,Pre Processing,Training R²,Validation R²
0,Trial 1,SVM,,0.736168,0.600842
1,Trial 2,SVM,MW,0.692194,0.562931
2,Trial 3,SVM,SG,0.711168,0.588551
3,Trial 4,SVM,SNV,0.995624,0.466821
4,Trial 5,SVM,MSC,0.166823,0.027297
5,Trial 6,SVM,De-Trending,0.177411,0.100529
6,Trial 7,SVM,First Order Derivative,-0.000574,-0.000155
7,Trial 8,SVM,Second Order Derivative,-0.010075,-0.000456
8,Trial 9,SVM,Moving window and SNV,0.984426,0.75028
9,Trial 10,SVM,Moving window and MSC,0.978201,0.728669


In [37]:
print("R\u00b2 + y\u00b2 = 2")  # x² + y² = 2

R² + y² = 2


In [38]:
print("R\u00b2")

R²
