# Drill - Data Analysis

Analysis of the spectral data obtained for samples of the drill at Mina do Barroso using data obtained with the SciAps commercial device.

In [1]:
%matplotlib widget
from core.experiment import *

# Data table with samples and concentrations

In [2]:
#path to data
root_path = "C:\\Users\\Miguel\\Desktop\\Furo SciAps\\Converted1\\"

#get the concentration data
concentration_data = pd.read_csv(root_path+"Furo.csv",sep=";")
concentration_data.set_index("Sample ID", inplace=True)

concentration_data2 = concentration_data['Chemistry'].groupby('Sample ID').first()
concentration_data1 = concentration_data.groupby('Sample ID').mean()

concentration_data3 = pd.concat([concentration_data1, concentration_data2], axis=1, join='outer', sort=True)
concentration_data3.head(len(concentration_data))

Unnamed: 0_level_0,H,He,Li,Be,B,C,N,O,F,Ne,...,Rg,Cn,Nh,Fl,Mc,Lv,Ts,Og,Li2O,Chemistry
Sample ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DRI0000001,0,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
DRI0000002,0,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
DRI0000003,0,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
DRI0000004,0,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
DRI0000005,0,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DRI0000125,0,0,11000,108.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,FGP
DRI0000126,0,0,1860,209.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,FGP
DRI0000127,0,0,4280,160.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,FGP
DRI0000128,0,0,4970,139.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,FGP


## Load the data from folder

In [3]:
#path to data
root_path = "C:\\Users\\Miguel\\Desktop\\Furo SciAps\\Converted1\\"

#find all folders in path
sample_folders = [f for f in os.listdir(root_path) if os.path.isdir(root_path+f)]

#experiment for each sample
list_of_experiments = []

#sample identification
sample_ids = []
for i in range(0,len(sample_folders)):
    sample_f = sample_folders[i]
    print(" Reading Folder "+ sample_f + " - "+ str(i+1) + " of " + str(len(sample_folders)), end="\r")

    sample_folder = root_path + sample_f +"\\"
    
    current_experiment = experiment(sample_folder,Specialtype="SciAps")
    sg = current_experiment.mean_signal
    
    list_of_experiments.append(current_experiment)
    
    sample_ids.append(sample_f)

 Reading Folder DRI0000128 - 124 of 124

## 0. Pre-Processing

In [4]:
#list of processed experiments
list_of_signals_pro = []

for i in range(0,len(list_of_experiments)):
    
    print(" Removing baseline " + str(i+1) + " of " + str(len(list_of_experiments)), end = "\r")
    
    sg = list_of_experiments[i].mean_signal
    sg.remove_baseline()
    
    list_of_signals_pro.append(sg)

 Removing baseline 124 of 124

In [5]:
import pickle

targets = []
for i in range(0, len(sample_ids)):
    target = concentration_data3.loc[" " + str(sample_ids[i]),"Li"]
    targets.append(target)
targets = np.array(targets)[5:]
list_of_signals_pro = list_of_signals_pro[5:]

#turn experiment into an array
list_of_spectra = []
for i in range(0,len(list_of_signals_pro)):
    list_of_spectra.append(np.ndarray.flatten(list_of_signals_pro[i].spectrum))
#list_of_spectra = np.array(list_of_spectra)
wavelengths = list_of_signals_pro[0].wavelengths

spectra_path = "C:\\Users\\Miguel\\Desktop\\Furo SciAps\\simplibs-master\\"
spectra_file = "saved_spectra.p"

pickle.dump((wavelengths, list_of_spectra, targets), open( spectra_file, "wb" ) )


### Avoid loading data and pre-processing again

In [9]:
import pickle
import sklearn
from sklearn.preprocessing import *

spectra_path = "C:\\Users\\Miguel\\Desktop\\Furo SciAps\\simplibs-master\\"
spectra_file = "saved_spectra.p"

wavelengths, list_of_spectra, targets = pickle.load( open( spectra_file, "rb" ) )

list_of_spectra = np.array(list_of_spectra)

In [10]:
print(wavelengths[0])

[180.         180.03333333 180.06666667 ... 960.93333333 960.96666667
 961.        ]


In [11]:
print(list_of_spectra)

[[-0.00530593 -0.00520839 -0.0051108  ...  0.00105029  0.00104663
   0.00104297]
 [-0.00541027 -0.00531728 -0.00522424 ...  0.0012198   0.00121642
   0.00121304]
 [-0.00477276 -0.00467811 -0.00458341 ...  0.00134716  0.00134279
   0.00133842]
 ...
 [-0.0053932  -0.00529107 -0.00518888 ...  0.00301422  0.00300771
   0.00300119]
 [-0.004329   -0.0042383  -0.00414757 ...  0.00122428  0.00122094
   0.00121761]
 [-0.00485091 -0.00475673 -0.00466249 ...  0.00198194  0.00197571
   0.00196948]]


In [2]:
# use this one or the above only
import pickle
import io

data_file = "signals_ndyag_340.dat" 
saved_object = pickle.load( open( data_file, "rb" ) )
current_list_of_spectra = saved_object[0]
targets = saved_object[1]
wavelengths = [saved_object[2]]
sample_id = saved_object[3]

targets = np.array(targets)[5:]
list_of_spectra = current_list_of_spectra[5:]
list_of_spectra = np.array(list_of_spectra)

In [11]:
print(wavelengths[0])

[177.84457397 177.88824463 177.93188477 ... 926.47070312 926.5458374
 926.62097168]


In [12]:
print(list_of_spectra)

[[   9.70291884    5.49466906   10.57808182 ...  137.66451571
   135.06123714 -154.44890751]
 [   6.75960568    7.86447294    6.91100459 ...  130.93321764
   127.53955479 -147.94853589]
 [   5.67300532    6.62355749    8.16578076 ...  132.43297848
   132.58081144 -154.76571865]
 ...
 [   6.82944719    5.69272451    8.47683428 ...  144.23249093
   139.58919331 -156.14428761]
 [   7.79738102    6.22622467    9.42173334 ...  142.24588861
   137.45029036 -155.0313351 ]
 [   7.32307708    6.85061996    7.14066091 ...  119.30602272
   121.13857452 -126.39433959]]


In [3]:

import sklearn
from sklearn.preprocessing import *



#peak area calculation

def area_intensity(line, wavelengths, spectra):
    
    ratio_of_maximum = 0.5

    radius = 0.5
    
    lines = []
    
    for i in range(0, len(spectra)):
        intensity = get_peak_area(line, ratio_of_maximum, wavelengths, spectra[i], radius, False)
        lines.append(intensity)
    
    return lines

#standard scaler

def scaler_norm(data):
    scaler = StandardScaler()
    scaler.fit(data)
    new_data = scaler.transform(data)
    return new_data

#Normalize to a selected peak area

def internal_ref_norm(wavelengths, spectra, line):
    peak_areas = area_intensity(line, wavelengths, spectra)
    
    norm_spectra = []
    
    for k in range(len(spectra)):
        #norm_spectrum = spectra[0]
        
        #for i in range(0,len(spectra[k])):
            #norm_spectrum[i] = spectra[k][i]/peak_areas[k]
            
        norm_spectra.append(spectra[k]/peak_areas[k])
        #print(norm_spectra)
    return np.array(norm_spectra)
    
#Normalize to total area

def total_area_norm(wavelengths, spectra):
    norm_spectra = []
    #print(len(spectra))
    for k in range(len(spectra)):        
        
        area = np.trapz(spectra[k],wavelengths)
        norm_spectra.append(spectra[k]/area)
        
    return np.array(norm_spectra)

def areas(wavelengths, spectra):
    areas = []
    for k in range(len(spectra)):        
        
        area = np.trapz(spectra[k],wavelengths)
        areas.append(area)

    return norm_spectra


#Normalize to peak maximum

def maximum_norm(spectra):
    norm_spectra = []
    for k in range(len(spectra)):
        maximum = max(spectra[k])
        
        #norm_spectrum = spectra[0]
        #for i in range(0,len(spectra[k])):
            #norm_spectrum[i] = spectra[k][i]/maximum
            
        norm_spectra.append(spectra[k]/maximum)
    return np.array(norm_spectra)



In [4]:
list_of_scaler_norm = scaler_norm(list_of_spectra)

line = 656

list_of_internal_ref_norm = internal_ref_norm(wavelengths[0], list_of_spectra, line)

list_of_total_area_norm = total_area_norm(wavelengths[0], list_of_spectra)

list_of_total_maximum_norm = maximum_norm(list_of_spectra)

list_of_lists = [list_of_spectra, list_of_scaler_norm, list_of_internal_ref_norm,
                 list_of_total_area_norm, list_of_total_maximum_norm]

In [13]:
print(list_of_spectra[:,0])

[9.70291884 6.75960568 5.67300532 8.14146641 7.25238868 8.4883861
 7.98010395 7.79498437 7.03030176 6.3892147  7.0948482  8.30175779
 7.73977084 5.85386459 8.20806502 7.4219731  6.79509938 8.00057179
 7.2383375  6.15254042 6.38605391 7.80186442 6.707756   7.55693673
 4.50069449 6.70550303 5.90235436 5.65015045 7.86292076 4.55309836
 8.35233373 6.4906579  7.5769983  8.37857556 7.89220267 5.70878769
 5.71672933 6.48914866 8.16000932 5.61427427 5.14145858 6.57655043
 8.4267747  7.20443133 6.18527345 7.76596926 6.2259409  7.2261716
 5.82872242 7.28641476 5.69265699 5.76356721 8.98088913 7.65870381
 7.06089455 6.06574406 6.82610547 7.84013911 5.05433836 6.29971917
 7.16663769 6.17862712 7.70141498 8.55062084 7.83931831 5.46314072
 7.40792605 8.03396726 7.25093678 5.31579905 6.00866726 9.03566564
 8.183537   8.71626522 6.90976222 6.19709225 5.86473121 7.56386943
 4.34724608 6.02447696 7.08876533 5.46693943 6.7982463  7.08228858
 7.75991808 6.35873476 7.48143942 8.06595803 8.9439065  5.206397

### 0.1 Set Target Values

In [5]:


subplots(figsize=[2,5])
plot(targets,-arange(5,len(targets) + 5),'o', ms = 2,ls=':', lw = 0.5)
ylabel('depth (m)')
xlabel('Li (ppm)')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'Li (ppm)')

# Leave One Out Cross Validation

In [6]:
from core.errors import *
from tqdm.notebook import trange, tqdm

def LeaveOneOut_predictions(model, data, targets, progress_bar = True):
    
    all_test_predictions = []
    all_train_predictions = []
    
    if progress_bar:
        bar = tqdm(total = len(data))
    
    for i in range(0,len(data)):
        if progress_bar:
            bar.update(1)
        
        #data to test
        data_test = data[i].reshape(1,-1)
        targets_test = targets[i]
        
        #data to train
        data_train = np.delete(data,i,axis=0)
        targets_train = np.delete(targets,i)
        
        #fit the model
        model.fit(data_train,targets_train)
        
        all_test_predictions.append(model.predict(data_test))
        all_train_predictions.append(model.predict(data_train))
    
    all_test_predictions = np.array(all_test_predictions)
    all_train_predictions = np.array(all_train_predictions)
    
    all_test_predictions.flatten()
    
    
    MAE = mean_absolute_error(targets,all_test_predictions)
    
    while type(MAE) == list or type(MAE) == np.ndarray:
        MAE = MAE[0]
    
    MAPE = mean_absolute_percentage_error(targets,all_test_predictions)
    
    while type(MAPE) == list or type(MAPE) == np.ndarray:
        MAPE = MAPE[0]
    if progress_bar:
        bar.close()
        
    return MAE, MAPE,  all_test_predictions, all_train_predictions

In [10]:
univariate_model = sklearn.linear_model.LinearRegression()

data = np.array(line_1).reshape(-1,1)
data_targets = np.array(targets)

MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(univariate_model, data, data_targets)
subplots()
plot( targets, all_test_predictions, 'o', label='LOO predictions')
plot([np.min([np.min(targets),np.min(all_test_predictions)]),
      np.max([np.max(targets),np.max(all_test_predictions)])],
     [np.min([np.min(targets),np.min(all_test_predictions)]),
      np.max([np.max(targets),np.max(all_test_predictions)])],'-')
print(MAE,MAPE)

NameError: name 'line_1' is not defined

In [24]:
from sklearn import cross_decomposition

pls_model = sklearn.cross_decomposition.PLSRegression(n_components = 5)
#pls_model = sklearn.linear_model.Ridge(alpha = 60000.)


end = -1
data = np.array(list_of_spectra)[:end]
list_of_norm_signals = total_area_norm(wavelengths[0], list_of_spectra)

line = 656
#list_of_norm_signals = internal_ref_norm(wavelengths[0], list_of_spectra, line)

data = np.array(list_of_norm_signals)[:end]

data_targets = np.array(targets)[:end]

MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(pls_model, data, data_targets)
print(MAE, MAPE)

114


HBox(children=(IntProgress(value=0, max=113), HTML(value='')))

ValueError: Found input variables with inconsistent numbers of samples: [112, 117]

In [15]:
subplots()
style.use('bmh')
plot( data_targets, all_test_predictions.flatten(), 'o',ms=2, label='LOO predictions')
plot([np.min([np.min(targets),np.min(all_test_predictions)]),
      np.max([np.max(targets),np.max(all_test_predictions)])],
     [np.min([np.min(targets),np.min(all_test_predictions)]),
      np.max([np.max(targets),np.max(all_test_predictions)])],'-')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x2950010b6d8>]

### Peak finding and errors

In [35]:
from core.line_area_intensity import *


list_of_peak_areas = []

for i in range(len(list_of_lists)):
    print(i)
    if i == 1:
        peaks = None
    else:
        #print(list_of_lists[i])
        peaks = line_area_intensity(wavelengths[0], list_of_lists[i], targets)
        
    
    list_of_peak_areas.append(peaks)

0
[[4.015599114639082, 6.721671791023485, 4.831342219188686, 4.831342219188686, 8.150531402138324, 6.721671791023485, 6.721671791023485, 6.721671791023485, 4.831342219188686, 4.831342219188686, 7.227759532412023, 14.933345409909236, 7.91143931159846, 6.249982221392605, 16.158517968685395, 9.827741627699663, 16.663444318811397, 34.80296360011539, 5.236206698462745, 7.413413978419944, 8.947314827180584, 2.6321394500326236, 15.476970822841032, 19.131961295229132, 2.5468695000356374, 24.149665800374585, 4.59068566807011, 4.59068566807011, 19.878667914231606, 14.464625597225336, 13.445167520769887, 7.205830510088972, 16.176270507692344, 16.176270507692344, 3.7665135321761216, 17.390619062886664, 16.154523623536857, 28.397989452516274, 13.247095448090878, 66.8616438820153, 13.28738888929296, 24.172653513541135, 4.6552727707182795, 297.2347937451784, 205.16653814775344, 26.273228633834243, 17.269401967418517, 96.895045300425, 124.79292346093881, 335.4423178996312, 71.57906802502931, 34.376723

1
2
[[0.03217809955303579, 0.053862603780776175, 0.03871487328926479, 0.03871487328926479, 0.06531244862362454, 0.053862603780776175, 0.053862603780776175, 0.053862603780776175, 0.03871487328926479, 0.03871487328926479, 0.05791802397092147, 0.11966500179447052, 0.0633965379781092, 0.05008282559668044, 0.12948264629550732, 0.07875239520913879, 0.13352875993819302, 0.2788857143090481, 0.037288838131741574, 0.059405724081339956, 0.07169729323619191, 0.05475184499141137, 0.059082125735779416, 0.021092056961534267, 0.12402122166554938, 0.1533096650405781, 0.02040876541233824, 0.193517910556398, 0.03678642619110713, 0.03678642619110713, 0.1592932304406971, 0.11590902108925594, 0.10773982328392466, 0.057742300686943776, 0.12962490227055712, 0.12962490227055712, 0.03018210830965163, 0.13935581105535377, 0.1294506385100107, 0.22756089580212252, 0.10615261731749623, 0.5357807320135606, 0.10647549973818844, 0.19370211742100396, 0.03730398039896321, 2.3818241091067582, 1.64405586837122, 0.21053460

3
[[0.00029778572176522653, 0.0004984605854857128, 0.00035827897376304455, 0.00035827897376304455, 0.0006044208615122088, 0.0004984605854857128, 0.0004984605854857128, 0.0004984605854857128, 0.00035827897376304455, 0.00035827897376304455, 0.0005359906523680619, 0.0011074155846492655, 0.0005866904534907266, 0.000463481391863667, 0.0011982709923463774, 0.0007287981320791452, 0.0012357149336648617, 0.0025808915032011367, 0.00034508201948255655, 0.0005497582724981049, 0.0006635081161265268, 0.0005066898886269934, 0.0005467635969816855, 0.00019519218017984252, 0.0011477293414122996, 0.0014187733238402742, 0.00018886879657517607, 0.0017908724091855446, 0.00034043254967373117, 0.00034043254967373117, 0.001474147021048878, 0.0010726566199882677, 0.000997056429221761, 0.0005343644567362269, 0.00119958747152988, 0.00119958747152988, 0.000279314224029417, 0.0012896402010613853, 0.0011979747827606442, 0.0021059163388559266, 0.0009823679522489735, 0.004958274547187215, 0.0009853560023829777, 0.0017

4
[[0.0012083882099639589, 0.002022709118025901, 0.002022709118025901, 0.002022709118025901, 0.001453864494263141, 0.001453864494263141, 0.002452686578043849, 0.002022709118025901, 0.002022709118025901, 0.002022709118025901, 0.001453864494263141, 0.001453864494263141, 0.0021750028212679435, 0.004493794826992775, 0.0023807381451820897, 0.001880766633623562, 0.004862478062776474, 0.0026044104334005305, 0.002957398578504478, 0.0050144222760803185, 0.010473030221784599, 0.0014003124170636541, 0.002230870610177668, 0.0026924574488970247, 0.0026924574488970247, 0.0020561029047835647, 0.002218718480907128, 0.0007920726615384973, 0.004657384498396432, 0.005757257087329539, 0.0007664129282573149, 0.007267202376193722, 0.001381445278420881, 0.001381445278420881, 0.005981958669569848, 0.0043527460122967715, 0.004045967102109118, 0.002168403862738367, 0.004867820219263296, 0.004867820219263296, 0.0011334325003613025, 0.005233246257810722, 0.004861276070395423, 0.008545622872572649, 0.0039863625572

In [7]:
import pickle

peaks_path = "C:\\Users\\Miguel\\Desktop\\Furo SciAps\\simplibs-master\\"
peaks_file = "saved_nd_yag_340_peaks.p"


pickle.dump(list_of_peak_areas, open( peaks_file, "wb" ) )

 


NameError: name 'list_of_peak_areas' is not defined

## Avoid calculating peaks again

In [7]:
peaks_path = "C:\\Users\\Miguel\\Desktop\\Furo SciAps\\simplibs-master\\"
peaks_file = "saved_nd_yag_340_peaks.p"

saved_list_of_peak_areas = pickle.load( open( peaks_file, "rb" ) )

lines = saved_list_of_peak_areas[0].peaks

data = np.array(saved_list_of_peak_areas[0].data)

target = saved_list_of_peak_areas[0].target

In [25]:
print(len(saved_list_of_peak_areas[0].data[0]))
print(saved_list_of_peak_areas[0].peaks)
print(wavelengths[0][-3])

135
[238.116745   243.42352295 250.57090759 251.3298645  251.51896667
 251.80207825 252.30404663 252.741745   254.66304016 251.38937378
 251.58334351 251.89335632 252.39628601 252.82099915 257.55569458
 259.32455444 259.88604736 260.52069092 261.15341187 263.07705688
 273.92630005 274.62036133 274.89724731 275.552948   279.50161743
 279.80401611 280.23974609 285.18017578 288.12942505 302.03424072
 308.17245483 309.22097778 344.01156616 356.95877075 358.05697632
 360.81002808 361.79742432 363.09036255 364.70507812 371.92208862
 373.41320801 373.64212036 374.50079346 374.8765564  375.74612427
 376.3119812  381.52233887 381.97338867 382.51010132 383.34979248
 385.91931152 390.49090576 393.28781128 394.33624268 396.07687378
 396.76895142 402.412323   403.06588745 403.29019165 404.12988281
 404.57681274 406.35824585 407.13442993 407.74295044 414.35717773
 420.1892395  421.50973511 422.66650391 425.43261719 426.00308228
 427.14019775 427.44943237 428.93878174 430.77532959 432.54776001
 438.3

In [8]:
import sklearn
univariate_model = sklearn.linear_model.LinearRegression()
#print(len(list_of_spectra))
#print(len(data[0]))
#print(len(targets))
data = np.array(data)
data_targets = np.array(targets)
MAE_list = []
MAPE_list = []
indexes = []

for i in range(len(data[0,:])):
    data_i = np.array(data[:,i]).reshape(-1,1)
    if data[0,i] != None:
        indexes.append(i)
        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(univariate_model, data_i, data_targets)
        MAE_list.append(MAE)
        MAPE_list.append(MAPE)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




In [9]:
subplots()
plot(lines[indexes], MAE_list,'o')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x1b29a27d648>]

## Minimum error

In [10]:
max_alpha = 100

MAE_lists_list = []

MAPE_lists_list = []

for j in range(len(list_of_lists)):
    bar = tqdm(total = max_alpha//(max_alpha//10))
    MAE_list = []
    MAPE_list = []
    
    for i in range(0,max_alpha, max_alpha//10):
        ridge_model = sklearn.linear_model.Ridge(alpha = i)
    
        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(ridge_model, list_of_lists[j], targets, progress_bar =False)

        MAE_list.append(MAE)
        MAPE_list.append(MAPE)
        bar.update(1)
    
    MAE_lists_list.append(MAE_list)
    MAPE_lists_list.append(MAPE_list)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)




HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)
  overwrite_a=False)


In [13]:
subplots()

for i in range(len(list_of_lists)):
    plot(range(0,max_alpha,max_alpha//10),MAE_lists_list[i], label = list_of_norms[i])
    print(range(0,max_alpha,max_alpha//10)[MAE_lists_list[i].index(min(MAE_lists_list[i]))])
xlabel("alpha")
ylabel("MAE")
legend()
subplots()

for i in range(len(list_of_lists)):
    plot(range(0,max_alpha,max_alpha//10),MAPE_lists_list[i], label = list_of_norms[i])
    print(range(0,max_alpha,max_alpha//10)[MAPE_lists_list[i].index(min(MAPE_lists_list[i]))])
xlabel("alpha")
ylabel("MAPE")
legend()

list_of_ridge_minima = [60000, 0,10, 0, 0]

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

90
0
10
0
0


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

90
0
10
0
0


In [28]:
max_pca_n_components = 100

MAE_lists_list = []

MAPE_lists_list = []

for j in range(len(list_of_lists)):
    bar = tqdm(total = max_pca_n_components-1)

    
    MAE_list = []
    MAPE_list = []
    
    for i in range(80, max_pca_n_components, 2):
    
    
        steps = [
        ('pca', sklearn.decomposition.PCA()),
        ('estimator', sklearn.linear_model.LinearRegression())]

        pipe = sklearn.pipeline.Pipeline(steps)

        pca_model = pipe.set_params(pca__n_components = i)
    
        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(pca_model, list_of_lists[j], targets,
                                                                                         progress_bar = False)

        MAE_list.append(MAE)
        MAPE_list.append(MAPE)
        
        bar.update(12)
    
    MAE_lists_list.append(MAE_list)
    MAPE_lists_list.append(MAPE_list)

HBox(children=(IntProgress(value=0, max=99), HTML(value='')))

HBox(children=(IntProgress(value=0, max=99), HTML(value='')))

HBox(children=(IntProgress(value=0, max=99), HTML(value='')))

HBox(children=(IntProgress(value=0, max=99), HTML(value='')))

HBox(children=(IntProgress(value=0, max=99), HTML(value='')))

In [31]:
subplots()
for i in range(len(list_of_lists)):
    plot(range(80,max_pca_n_components,2),MAE_lists_list[i], label = list_of_norms[i])
    print(range(80,max_pca_n_components,2)[MAE_lists_list[i].index(min(MAE_lists_list[i]))])
xlabel("pca_n_components")
ylabel("MAE")
legend()
subplots()

for i in range(len(list_of_lists)):
    plot(range(80,max_pca_n_components,2),MAPE_lists_list[i], label = list_of_norms[i])
    print(range(80,max_pca_n_components,2)[MAPE_lists_list[i].index(min(MAPE_lists_list[i]))])
xlabel("pca_n_components")
ylabel("MAPE")
legend()

list_of_pca_minima = [24, 86, 23, 24, 24]

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

94
86
94
90
90


  


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

94
90
92
90
94


In [17]:
max_n_components = 15

MAE_lists_list = []

MAPE_lists_list = []

for j in range(len(list_of_lists)):
    bar = tqdm(total = max_n_components-1)
    
    MAE_list = []
    MAPE_list = []
    
    for i in range(5,max_n_components):

        pls_model = sklearn.cross_decomposition.PLSRegression(n_components = i, scale = False)
    
        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(pls_model, list_of_lists[j], targets,
                                                                                        progress_bar = False)

        MAE_list.append(MAE)
        MAPE_list.append(MAPE)
        
        bar.update(1)
    
    MAE_lists_list.append(MAE_list)
    MAPE_lists_list.append(MAPE_list)
    

    

HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

HBox(children=(IntProgress(value=0, max=14), HTML(value='')))

In [19]:
subplots()

for i in range(len(list_of_lists)):
    plot(range(5,max_n_components),MAE_lists_list[i], label = list_of_norms[i])
    print(range(5,max_n_components)[MAE_lists_list[i].index(min(MAE_lists_list[i]))])
xlabel("n_components")
ylabel("MAE")
legend()
subplots()

for i in range(len(list_of_lists)):
    plot(range(5,max_n_components),MAPE_lists_list[i], label = list_of_norms[i])
    print(range(5,max_n_components)[MAPE_lists_list[i].index(min(MAPE_lists_list[i]))])
xlabel("n_components")
ylabel("MAPE")
legend()

list_of_pls_minima = [13, 6, 10, 10, 10]

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

13
6
10
10
10


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

13
7
12
12
12


In [22]:
max_layer_sizes = 20

MAE_lists_list = []

MAPE_lists_list = []

for j in range(len(list_of_lists)):
    bar = tqdm(total = max_layer_sizes-10)
    
    MAE_list = []
    MAPE_list = []
    
    for i in range(10,max_layer_sizes):

        mlp_model = sklearn.neural_network.MLPRegressor(hidden_layer_sizes = i)
    
        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(mlp_model, list_of_lists[j], targets,
                                                                                        progress_bar = False)

        MAE_list.append(MAE)
        MAPE_list.append(MAPE)
        
        bar.update(1)
    
    MAE_lists_list.append(MAE_list)
    MAPE_lists_list.append(MAPE_list)

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

































































HBox(children=(IntProgress(value=0, max=10), HTML(value='')))













































































HBox(children=(IntProgress(value=0, max=10), HTML(value='')))













































































HBox(children=(IntProgress(value=0, max=10), HTML(value='')))













































































HBox(children=(IntProgress(value=0, max=10), HTML(value='')))













































































In [31]:
subplots()

for i in range(5,len(MAE_lists_list)):
    plot(range(10,max_layer_sizes),MAE_lists_list[i], label = list_of_norms[i-5])
    print(range(10,max_layer_sizes)[MAE_lists_list[i].index(min(MAE_lists_list[i]))])
xlabel("hidden layer sizes")
ylabel("MAE")
legend()
subplots()

for i in range(5,len(MAE_lists_list)):
    plot(range(10,max_layer_sizes),MAPE_lists_list[i], label = list_of_norms[i-5])
    print(range(10,max_layer_sizes)[MAPE_lists_list[i].index(min(MAPE_lists_list[i]))])
xlabel("hidden layer sizes")
ylabel("MAPE")
legend()

list_of_mlp_minima = [16, 10, 18, 10, 10]

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

16
19
18
19
19


  if __name__ == '__main__':


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

16
17
10
19
19


## Error tables

In [38]:
import sklearn
from sklearn import linear_model

univariate_model = sklearn.linear_model.LinearRegression()

multivariate_model = sklearn.linear_model.LinearRegression()

min_error_alpha = 60000

ridge_model = sklearn.linear_model.Ridge(alpha = min_error_alpha)

min_error_pca_n_components = 66

steps = [
    ('pca', sklearn.decomposition.PCA()),
    ('estimator', sklearn.linear_model.LinearRegression())]

pipe = sklearn.pipeline.Pipeline(steps)

pca_model = pipe.set_params(pca__n_components = min_error_pca_n_components)

min_error_n_components = 5

pls_model = sklearn.cross_decomposition.PLSRegression(n_components = min_error_n_components, scale = False)

min_error_layer_sizes = 4

mlp_model = sklearn.neural_network.MLPRegressor(hidden_layer_sizes = min_error_layer_sizes)

models = [univariate_model, multivariate_model, ridge_model, pca_model, pls_model, mlp_model]


### Univariate

In [12]:
list_of_norms = ["None", "Standard scaling", "Internal reference",
                 "Total area", "Maximum"]



text = ["Models", "Normalizations", "Mean Absolute Error", "Mean Absolute Percentage Error"]

# peak area of 610.3 line

data_test = []


for i in range(len(list_of_norms)):
    current_line = [models[0], list_of_norms[i]]
    
    if saved_list_of_peak_areas[i] != None:
        #print(np.isclose(saved_list_of_peak_areas[i].peaks, 610.22436523))
        line_index = np.where(np.isclose(saved_list_of_peak_areas[i].peaks, 610.22436523))[0][0]
        
        data = np.array(saved_list_of_peak_areas[i].data)[:,line_index].reshape(-1, 1)
        print(len(data))
        print(len(targets))

        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[0], data, targets)

        current_line.append(MAE)
        current_line.append(MAPE)
    else:
        current_line.append("None")
        current_line.append("None")
    data_test.append(current_line)

NameError: name 'models' is not defined

In [112]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error
0,"LinearRegression(copy_X=True, fit_intercept=Tr...",,1838.82,112.463
1,"LinearRegression(copy_X=True, fit_intercept=Tr...",Standard scaling,,
2,"LinearRegression(copy_X=True, fit_intercept=Tr...",Internal reference,1913.48,121.958
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",Total area,1997.47,134.473
4,"LinearRegression(copy_X=True, fit_intercept=Tr...",Maximum,1935.43,129.169


In [113]:
# peak intensity of 610.3 line

data_test = []


for i in range(len(list_of_norms)):
    current_line = [models[0], list_of_norms[i]]
    
    
        
    line_index = np.where(np.isclose(wavelengths[0], 610.22436523))[0][0]
        
    data = np.array(list_of_lists[i])[:,line_index].reshape(-1, 1)
    

    MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[0], data, targets)

    current_line.append(MAE)
    current_line.append(MAPE)
    
    
    data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




In [114]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error
0,"LinearRegression(copy_X=True, fit_intercept=Tr...",,1893.998605,120.557414
1,"LinearRegression(copy_X=True, fit_intercept=Tr...",Standard scaling,1893.998605,120.557414
2,"LinearRegression(copy_X=True, fit_intercept=Tr...",Internal reference,2047.403371,138.941707
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",Total area,2038.213188,139.423916
4,"LinearRegression(copy_X=True, fit_intercept=Tr...",Maximum,2657.248274,217.126544


### Multivariate

In [115]:
# All peak areas

data_test = []


for i in range(len(list_of_norms)):
    current_line = [models[1], list_of_norms[i]]
    
    if saved_list_of_peak_areas[i] != None:
        
        
        
        data = np.array(saved_list_of_peak_areas[i].data)
        #print(data)

        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[1], data[:,indexes], targets)

        current_line.append(MAE)
        current_line.append(MAPE)
    else:
        current_line.append("None")
        current_line.append("None")
    data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




In [116]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error
0,"LinearRegression(copy_X=True, fit_intercept=Tr...",,6490.61,562.066
1,"LinearRegression(copy_X=True, fit_intercept=Tr...",Standard scaling,,
2,"LinearRegression(copy_X=True, fit_intercept=Tr...",Internal reference,7646.05,675.215
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",Total area,10743.8,810.034
4,"LinearRegression(copy_X=True, fit_intercept=Tr...",Maximum,11085.1,836.873


In [117]:
# same peaks but peak intensity

data_test = []


for i in range(len(list_of_norms)):
    current_line = [models[1], list_of_norms[i]]
    
    if saved_list_of_peak_areas[i] != None:
    
        index_list = []

        for j in range(len(saved_list_of_peak_areas[i].peaks)):

            index_list.append(np.where(np.isclose(wavelengths[0], saved_list_of_peak_areas[i].peaks[j]))[0][0])


        data = np.array(list_of_lists[i][:,index_list])


        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[1], data, targets)

        current_line.append(MAE)
        current_line.append(MAPE)
    else:
        current_line.append("None")
        current_line.append("None")
    
    
    data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




In [118]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error
0,"LinearRegression(copy_X=True, fit_intercept=Tr...",,5314.59,420.519
1,"LinearRegression(copy_X=True, fit_intercept=Tr...",Standard scaling,,
2,"LinearRegression(copy_X=True, fit_intercept=Tr...",Internal reference,8136.08,637.316
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",Total area,5288.28,488.857
4,"LinearRegression(copy_X=True, fit_intercept=Tr...",Maximum,6090.11,670.47


In [119]:
# all peak intensity 

data_test = []


for i in range(len(list_of_norms)):
    current_line = [models[1], list_of_norms[i]]
    
    
        
    data = np.array(list_of_lists[i])
    

    MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[1], data, targets)

    current_line.append(MAE)
    current_line.append(MAPE)
    
    
    data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))

KeyboardInterrupt: 

In [None]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

In [None]:
# 6 peak areas

data_test = []

#Li_lines = [288.13333333, 422.63333333, 453.53333333, 610.3, 670.76666667, 812.6]
        
Li_lines = [309.22097778, 288.12942505, 422.66650391, 610.22436523, 670.71270752, 812.45605469]

for i in range(len(list_of_norms)):
    current_line = [models[1], list_of_norms[i]]
    
    if saved_list_of_peak_areas[i] != None:
        
        
        index_list = []
        
        for j in range(len(Li_lines)):
            index_list.append(np.where(np.isclose(saved_list_of_peak_areas[i].peaks, Li_lines[j]))[0][0])
        
        data = array(saved_list_of_peak_areas[i].data)[:,index_list]
        

        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[1], data, targets)

        current_line.append(MAE)
        current_line.append(MAPE)
    else:
        current_line.append("None")
        current_line.append("None")
    data_test.append(current_line)

In [None]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

In [None]:
# 6 peak intensities

data_test = []

#Li_lines = [288.13333333, 422.63333333, 453.53333333, 610.3, 670.76666667, 812.6]
        
Li_lines = [309.22097778, 288.12942505, 422.66650391, 610.22436523, 670.71270752, 812.45605469]

for i in range(len(list_of_norms)):
    current_line = [models[1], list_of_norms[i]]
    
    if saved_list_of_peak_areas[i] != None:
        
        
        index_list = []
        
        for j in range(len(Li_lines)):
            index_list.append(np.where(np.isclose(wavelengths[0], Li_lines[j]))[0][0])
        
        data = array(list_of_lists[i])[:,index_list]
        

        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[1], data, targets)

        current_line.append(MAE)
        current_line.append(MAPE)
    else:
        current_line.append("None")
        current_line.append("None")
    data_test.append(current_line)

In [None]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

In [None]:
# 3 peak areas

data_test = []


for i in range(len(list_of_norms)):
    current_line = [models[1], list_of_norms[i]]
    
    if saved_list_of_peak_areas[i] != None:
        Li_lines = [610.3, 670.76666667, 812.6]
        
        index_list = []
        
        for j in range(len(Li_lines)):
            index_list.append(np.where(np.isclose(saved_list_of_peak_areas[i].peaks, Li_lines[j]))[0][0])
        
        data = array(saved_list_of_peak_areas[i].data)[:,index_list]
        

        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[1], data, targets)

        current_line.append(MAE)
        current_line.append(MAPE)
    else:
        current_line.append("None")
        current_line.append("None")
    data_test.append(current_line)

In [None]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

In [None]:
# 3 peak intensities

data_test = []


for i in range(len(list_of_norms)):
    current_line = [models[1], list_of_norms[i]]
    
    if saved_list_of_peak_areas[i] != None:
        Li_lines = [610.3, 670.76666667, 812.6]
        
        index_list = []
        
        for j in range(len(Li_lines)):
            index_list.append(np.where(np.isclose(wavelengths[0], Li_lines[j]))[0][0])
        
        data = array(list_of_lists[i])[:,index_list]
        

        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[1], data, targets)

        current_line.append(MAE)
        current_line.append(MAPE)
    else:
        current_line.append("None")
        current_line.append("None")
    data_test.append(current_line)

In [None]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

## All except univariate

In [13]:
# all peak intensity 

data_test = []

models_prediction = []

for j in range(1,len(models)):
    for i in range(len(list_of_norms)):
        current_line = [models[j], list_of_norms[i]]



        data = np.array(list_of_lists[i])


        MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[j], data, targets)

        current_line.append(MAE)
        current_line.append(MAPE)
        
        models_prediction.append(all_test_predictions)

        data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))








HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












In [21]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error
0,"LinearRegression(copy_X=True, fit_intercept=Tr...",,1148.001483,65.521849
1,"LinearRegression(copy_X=True, fit_intercept=Tr...",Standard scaling,1447.164947,81.280779
2,"LinearRegression(copy_X=True, fit_intercept=Tr...",Internal reference,1196.45997,72.805316
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",Total area,1211.551523,75.546046
4,"LinearRegression(copy_X=True, fit_intercept=Tr...",Maximum,1389.700116,116.172717
5,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",,1118.94048,61.329635
6,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Standard scaling,1954.571137,125.588244
7,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Internal reference,2094.974532,126.749889
8,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Total area,2862.879945,245.362998
9,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Maximum,2862.221461,245.271458


In [38]:
subplots()

model_no = 4
#print(models_prediction[model_no*5])
for i in range(5):
    plot(targets,models_prediction[model_no * 5 + i].flatten(), 'o', label = data_test[model_no * 5 + i][1])
xlabel("Real concentration (ppm)")
ylabel("Predicted concentration (ppm)")
legend()

plot([0,14000],[0,14000])

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

[<matplotlib.lines.Line2D at 0x1ff2de3cb08>]

In [16]:
# All peak areas

data_test = []

for j in range(1,len(models)):
    for i in range(len(list_of_norms)):
        current_line = [models[j], list_of_norms[i]]

        if saved_list_of_peak_areas[i] != None:



            data = np.array(saved_list_of_peak_areas[i].data)
            

            MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[j], data[:,indexes], targets)

            current_line.append(MAE)
            current_line.append(MAPE)
        else:
            current_line.append("None")
            current_line.append("None")
        data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












In [17]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error
0,"LinearRegression(copy_X=True, fit_intercept=Tr...",,[2116.176062110856],[130.60880555242704]
1,"LinearRegression(copy_X=True, fit_intercept=Tr...",Standard scaling,,
2,"LinearRegression(copy_X=True, fit_intercept=Tr...",Internal reference,[152000.30970528454],[1887.3330397391971]
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",Total area,[2091.1421519054834],[143.79567586208995]
4,"LinearRegression(copy_X=True, fit_intercept=Tr...",Maximum,[2474.731129975679],[243.386907286658]
5,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",,[1448.2019915632427],[83.90925929060779]
6,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Standard scaling,,
7,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Internal reference,[2829.0368594551687],[240.59808451995258]
8,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Total area,[2863.1601277677782],[245.40086989203812]
9,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Maximum,[2863.1501323193015],[245.39940391405474]


In [18]:
# same peaks but peak intensity

data_test = []

for k in range(1,len(models)):
    for i in range(len(list_of_norms)):
        current_line = [models[k], list_of_norms[i]]

        if saved_list_of_peak_areas[i] != None:

            index_list = []

            for j in range(len(saved_list_of_peak_areas[i].peaks)):

                index_list.append(np.where(np.isclose(wavelengths[0], saved_list_of_peak_areas[i].peaks[j]))[0][0])


            data = np.array(list_of_lists[i][:,index_list])

            MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[k], data, targets)

            current_line.append(MAE)
            current_line.append(MAPE)
        else:
            current_line.append("None")
            current_line.append("None")


        data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












In [19]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error
0,"LinearRegression(copy_X=True, fit_intercept=Tr...",,[2187.43383638499],[155.8344398144897]
1,"LinearRegression(copy_X=True, fit_intercept=Tr...",Standard scaling,,
2,"LinearRegression(copy_X=True, fit_intercept=Tr...",Internal reference,[2169.887732591727],[162.63501933920833]
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",Total area,[2197.2935785032237],[169.17140626930208]
4,"LinearRegression(copy_X=True, fit_intercept=Tr...",Maximum,[2851.1355475000364],[326.91614462389396]
5,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",,[1225.4188705637844],[67.16818297009526]
6,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Standard scaling,,
7,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Internal reference,[2251.2813275728286],[155.56897541006828]
8,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Total area,[2863.051508002929],[245.38671491896457]
9,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Maximum,[2862.8081935144387],[245.3538456121828]


In [91]:
max_n_components = 25

error_list = []

error_norm_list = []

for i in range(1,max_n_components):

    pls_model = sklearn.cross_decomposition.PLSRegression(n_components = i, scale = False)
    
    MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(pls_model, list_of_lists[0], targets)
    
    error_list.append(MAE)
    
    MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(pls_model, list_of_lists[3], targets)
    
    error_norm_list.append(MAE)
    

    

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




In [94]:
subplots()

plot(range(1,max_n_components),error_list)
plot(range(1,max_n_components),error_norm_list)
xlabel("n_components")
ylabel("MAE")

print(range(1,max_n_components)[error_list.index(min(error_list))])
print(range(1,max_n_components)[error_norm_list.index(min(error_norm_list))])


  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

21
21


In [41]:
# 6 peak areas

data_test = []

min_error_pca_n_components = 3

pca_model = pipe.set_params(pca__n_components = min_error_pca_n_components)

min_error_n_components = 3

pls_model = sklearn.cross_decomposition.PLSRegression(n_components = min_error_n_components, scale = False)

#Li_lines = [288.13333333, 422.63333333, 453.53333333, 610.3, 670.76666667, 812.6]
        
Li_lines = [309.22097778, 288.12942505, 422.66650391, 610.22436523, 670.71270752, 812.45605469]

for k in range(1,len(models)):
    for i in range(len(list_of_norms)):
        current_line = [models[k], list_of_norms[i]]

        if saved_list_of_peak_areas[i] != None:

            index_list = []

            for j in range(len(Li_lines)):
                index_list.append(np.where(np.isclose(saved_list_of_peak_areas[i].peaks, Li_lines[j]))[0][0])

            data = array(saved_list_of_peak_areas[i].data)[:,index_list]


            MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[k], data, targets)

            current_line.append(MAE)
            current_line.append(MAPE)
        else:
            current_line.append("None")
            current_line.append("None")
        data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))

NameError: name 'numpy' is not defined

In [42]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error


In [47]:
# 6 peak intensities

data_test = []

#Li_lines = [288.13333333, 422.63333333, 453.53333333, 610.3, 670.76666667, 812.6]
        
Li_lines = [309.22097778, 288.12942505, 422.66650391, 610.22436523, 670.71270752, 812.45605469]

for k in range(1,len(models)):
    for i in range(len(list_of_norms)):
        current_line = [models[k], list_of_norms[i]]

        if saved_list_of_peak_areas[i] != None:


            index_list = []

            for j in range(len(Li_lines)):
                index_list.append(np.where(np.isclose(wavelengths[0], Li_lines[j]))[0][0])

            data = array(list_of_lists[i])[:,index_list]


            MAE, MAPE, all_test_predictions, all_train_predictions = LeaveOneOut_predictions(models[k], data, targets)

            current_line.append(MAE)
            current_line.append(MAPE)
        else:
            current_line.append("None")
            current_line.append("None")
        data_test.append(current_line)

HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))




HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












HBox(children=(IntProgress(value=0, max=123), HTML(value='')))












In [48]:
error_table_train = pd.DataFrame(data_test, columns = text)
error_table_train

Unnamed: 0,Models,Normalizations,Mean Absolute Error,Mean Absolute Percentage Error
0,"LinearRegression(copy_X=True, fit_intercept=Tr...",,1422.93,88.1658
1,"LinearRegression(copy_X=True, fit_intercept=Tr...",Standard scaling,,
2,"LinearRegression(copy_X=True, fit_intercept=Tr...",Internal reference,1512.29,95.7015
3,"LinearRegression(copy_X=True, fit_intercept=Tr...",Total area,1592.89,101.928
4,"LinearRegression(copy_X=True, fit_intercept=Tr...",Maximum,1791.32,116.879
5,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",,1426.63,87.1468
6,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Standard scaling,,
7,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Internal reference,2764.4,233.315
8,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Total area,2863.16,245.4
9,"Ridge(alpha=60000, copy_X=True, fit_intercept=...",Maximum,2863.14,245.397


In [56]:
MAE_bar_values = []
MAPE_bar_values = []

for i in range(len(data_test)):
    #print(data_test[i])
    if data_test[i][-2] == 'None':
        MAE_bar_values.append(0)
    else:
        MAE_bar_values.append(data_test[i][-2])
    if data_test[i][-1] == 'None':
        MAPE_bar_values.append(0)
    else:
        MAPE_bar_values.append(data_test[i][-1])
    
print(MAE_bar_values)

[1422.9334102665973, 0, 1512.2896911889986, 1592.891889498853, 1791.3206306560858, 1426.6273144009554, 0, 2764.4027242085795, 2863.155546891052, 2863.1383268240006, 1758.5448673945996, 0, 1850.4891731329983, 1758.8793620001254, 1910.0013527955732, 1417.0463193255312, 0, 1510.089000800856, 1599.188700028245, 1844.7186491246998, 3486.714687247304, 0, 4275.624082939182, 4293.549332517483, 4293.322244620654]


In [80]:
subplots()

bar(range(len(MAE_bar_values)),MAE_bar_values)

  """Entry point for launching an IPython kernel.


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<BarContainer object of 25 artists>