In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.metrics import r2_score, mean_squared_error
from bayes_opt import BayesianOptimization
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.decomposition import PCA
import color_functions
from color_functions import new_spectra_from_stock
from RL_functions import obtain_data

## Using Actual Data from Excel

In [2]:
Sample_concentrations = pd.read_excel('Sample_concentrations.xlsx')
red_conc = Sample_concentrations[['Red vol frac']].values
green_conc = Sample_concentrations[['Green vol frac']].values
blue_conc = Sample_concentrations[['Blue vol frac']].values
sample_conc = np.hstack((red_conc, green_conc, blue_conc))
y_train = sample_conc[0:-1,:]

In [3]:
#Normalize y_train
for j in range(y_train.shape[0]):
    row_sum = np.sum(y_train[j,:])
    for i in range(y_train.shape[1]):
        y_train[j,i] = y_train[j,i]/row_sum

In [4]:
sample_spectra = pd.read_excel('Sample_spectra.xlsx')
sample_spectra = np.asarray(sample_spectra)
x_train = sample_spectra[:,1:-1]
x_test = sample_spectra[:,-1].reshape(-1,1)

In [5]:
#Normalizae x_train
x_train = MinMaxScaler().fit(x_train).transform(x_train)
x_test = MinMaxScaler().fit(x_test).transform(x_test).T
x_train = x_train.T
x_test = x_test.reshape(1,-1)[0].reshape(-1,1).T

In [6]:
pca = PCA(n_components=0.99)
x_train_new = pca.fit(x_train)
x_train = pca.transform(x_train)
x_test = pca.transform(x_test)

## Linear Regression

In [7]:
reg = LinearRegression().fit(x_train, y_train)
prediction_array = reg.predict(x_test)

## Decision Tree Regressor

In [8]:
estimator = DecisionTreeRegressor(max_depth= 79)
clf = BaggingRegressor(base_estimator=estimator, n_estimators=56)
clf = clf.fit(x_train, y_train)
prediction_array = clf.predict(x_test)

## GP Regression

In [9]:
gpr = GaussianProcessRegressor().fit(x_train, y_train)
gpr.predict(x_test, return_std=True)

(array([[0.53449712, 0.00775591, 0.49257213]]), array([0.08186359]))

### Neural Network

In [10]:
regr = MLPRegressor(max_iter=500).fit(x_train, y_train)
prediction_nn = regr.predict(x_test)
red_prediction_nn = prediction_nn[0][0]
green_prediction_nn = prediction_nn[0][1]
blue_prediction_nn = prediction_nn[0][2]
sum_prediction_nn = np.sum(prediction_nn[0])
print(red_prediction_nn/sum_prediction_nn)
print(green_prediction_nn/sum_prediction_nn)
print(blue_prediction_nn/sum_prediction_nn)

0.5089352398921783
0.005260926024242955
0.48580383408357874


### Test the Performance of each Model

In [11]:
nn_y_train = []
dtr_y_train = []
lr_y_train = []
gpr_y_train = [] 

for i in range(x_train.shape[0]):
    prediction_nn = regr.predict(np.array([x_train[i,:]]))
    nn_y_train.append(prediction_nn[0])
    prediction_dtr = clf.predict(np.array([x_train[i,:]]))
    dtr_y_train.append(prediction_dtr[0])
    prediction_lr = reg.predict(np.array([x_train[i,:]]))
    lr_y_train.append(prediction_lr[0])
    prediction_gpr = gpr.predict(np.array([x_train[i,:]]))
    gpr_y_train.append(prediction_gpr[0])

In [12]:
nn_y_train_array = nn_y_train[0]
for i in range(1,len(nn_y_train)):
    nn_y_train_array = np.vstack((nn_y_train_array, nn_y_train[i]))

dtr_y_train_array = dtr_y_train[0]
for i in range(1,len(dtr_y_train)):
    dtr_y_train_array = np.vstack((dtr_y_train_array, dtr_y_train[i]))

lr_y_train_array = lr_y_train[0]
for i in range(1,len(lr_y_train)):
    lr_y_train_array = np.vstack((lr_y_train_array, lr_y_train[i]))

gpr_y_train_array = gpr_y_train[0]
for i in range(1,len(gpr_y_train)):
    gpr_y_train_array = np.vstack((gpr_y_train_array, gpr_y_train[i]))

In [13]:
print('Compare the predicted concentration dataset to the actual')
print('MSE of neural network:', np.sum((nn_y_train_array - y_train)**2))
print('MSE of decision tree regressor:', np.sum((dtr_y_train_array - y_train)**2))
print('MSE of linear regressor:', np.sum((lr_y_train_array - y_train)**2))
print('MSE of gaussian process regressor:', np.sum((gpr_y_train_array - y_train)**2))

Compare the predicted concentration dataset to the actual
MSE of neural network: 0.03996761768411995
MSE of decision tree regressor: 0.21452374555634754
MSE of linear regressor: 0.12320290151219522
MSE of gaussian process regressor: 2.8321745802036165e-16


## Spectra Generator for RBG Dyes 

In [14]:
x,y = obtain_data(50)
Sample_concentrations = pd.read_excel('Sample_concentrations.xlsx')
red_conc = Sample_concentrations[['Red conc']].values
green_conc = Sample_concentrations[['Green conc']].values
blue_conc = Sample_concentrations[['Blue conc']].values
sample_conc = np.hstack((red_conc, green_conc, blue_conc))
sample_spectra = pd.read_excel('Sample_spectra.xlsx')
sample_spectra = np.asarray(sample_spectra)
wavelength = sample_spectra[:,0]
red = sample_spectra[:,1]
green = sample_spectra[:,2]
blue = sample_spectra[:,3]
spectra = []
for i in range(x.shape[0]):
    spectra.append(new_spectra_from_stock(x[i,0],x[i,1],x[i,2] , red, green, blue, wavelength))

In [15]:
spectra0 = spectra[0]
for i in range(1,len(y)):
    spectra0 = np.vstack((spectra0,spectra[i]))

In [16]:
x_train = spectra0 #spectra 
y_train = x #concentrations 

In [17]:
pca = PCA(n_components=0.99)
x_train_new = pca.fit(x_train)
x_train = pca.transform(x_train)

### GP Regression

In [18]:
gpr = GaussianProcessRegressor().fit(x_train, y_train)
prediction_gpr = gpr.predict(x_test, return_std=True)

In [19]:
red_prediction_gpr = prediction_gpr[0][0][0]
green_prediction_gpr = prediction_gpr[0][0][1]
blue_prediction_gpr = prediction_gpr[0][0][2]
sum_prediction_gpr = np.sum(prediction_gpr[0][0])

In [20]:
print(red_prediction_gpr/sum_prediction_gpr)
print(green_prediction_gpr/sum_prediction_gpr)
print(blue_prediction_gpr/sum_prediction_gpr)

0.3309609705635455
0.5167828773850827
0.15225615205137186


### Linear Regression

In [21]:
reg = LinearRegression().fit(x_train, y_train)
prediction_lr = reg.predict(x_test)
red_prediction_lr = prediction_lr[0][0]
green_prediction_lr = prediction_lr[0][1]
blue_prediction_lr = prediction_lr[0][2]
sum_prediction_lr = np.sum(prediction_lr[0])

In [22]:
print(red_prediction_lr/sum_prediction_lr)
print(green_prediction_lr/sum_prediction_lr)
print(blue_prediction_lr/sum_prediction_lr)

0.322998762820858
0.4717762056722276
0.20522503150691446


### Decision Tree Regression

In [23]:
estimator = DecisionTreeRegressor(max_depth=27)
clf = BaggingRegressor(base_estimator=estimator, n_estimators=43)
clf = clf.fit(x_train, y_train)
prediction_dtr = clf.predict(x_test)

In [24]:
red_prediction_dtr = prediction_dtr[0][0]
green_prediction_dtr = prediction_dtr[0][1]
blue_prediction_dtr = prediction_dtr[0][2]
sum_prediction_dtr = np.sum(prediction_dtr[0])
print(red_prediction_dtr/sum_prediction_dtr)
print(green_prediction_dtr/sum_prediction_dtr)
print(blue_prediction_dtr/sum_prediction_dtr)

0.33204536759026654
0.4626555450267834
0.20529908738295008


### Neural Network 

In [25]:
regr = MLPRegressor(max_iter=500).fit(x_train, y_train)
prediction_nn = regr.predict(x_test)

In [26]:
red_prediction_nn = prediction_nn[0][0]
green_prediction_nn = prediction_nn[0][1]
blue_prediction_nn = prediction_nn[0][2]
sum_prediction_nn = np.sum(prediction_nn[0])
print(red_prediction_nn/sum_prediction_nn)
print(green_prediction_nn/sum_prediction_nn)
print(blue_prediction_nn/sum_prediction_nn)

0.30953770445327966
0.5825349158191202
0.10792737972760005


### MSE of each model 

In [27]:
nn_y_train = []
dtr_y_train = []
lr_y_train = []
gpr_y_train = [] 

for i in range(x_train.shape[0]):
    prediction_nn = regr.predict(np.array([x_train[i,:]]))
    nn_y_train.append(prediction_nn[0])
    prediction_dtr = clf.predict(np.array([x_train[i,:]]))
    dtr_y_train.append(prediction_dtr[0])
    prediction_lr = reg.predict(np.array([x_train[i,:]]))
    lr_y_train.append(prediction_lr[0])
    prediction_gpr = gpr.predict(np.array([x_train[i,:]]))
    gpr_y_train.append(prediction_gpr[0])
  

In [28]:
nn_y_train_array = nn_y_train[0]
for i in range(1,len(nn_y_train)):
    nn_y_train_array = np.vstack((nn_y_train_array, nn_y_train[i]))

dtr_y_train_array = dtr_y_train[0]
for i in range(1,len(dtr_y_train)):
    dtr_y_train_array = np.vstack((dtr_y_train_array, dtr_y_train[i]))

lr_y_train_array = lr_y_train[0]
for i in range(1,len(lr_y_train)):
    lr_y_train_array = np.vstack((lr_y_train_array, lr_y_train[i]))

gpr_y_train_array = gpr_y_train[0]
for i in range(1,len(gpr_y_train)):
    gpr_y_train_array = np.vstack((gpr_y_train_array, gpr_y_train[i]))

In [29]:
print('Compare the predicted concentration dataset to the actual')
print('MSE of neural network:', np.sum((nn_y_train_array - y_train)**2))
print('MSE of decision tree regressor:', np.sum((dtr_y_train_array - y_train)**2))
print('MSE of linear regressor:', np.sum((lr_y_train_array - y_train)**2))
print('MSE of gaussian process regressor:', np.sum((gpr_y_train_array - y_train)**2))

Compare the predicted concentration dataset to the actual
MSE of neural network: 1.1567838883570554
MSE of decision tree regressor: 0.25129664508615274
MSE of linear regressor: 4.314083075427408e-30
MSE of gaussian process regressor: 8.068287441504189e-19
