In [None]:
#import important functions
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, RANSACRegressor
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.model_selection import StratifiedKFold, KFold,ShuffleSplit
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import linear_model


In [None]:
# load data for RGB channel
data= pd.read_csv('C:/Users/Brinda Khanal/Documents/Bidur Git Repo/padColorimetry/Data/new data/mean_pixel_RGB_2400.csv')


# retrive dilution factor 
Dilution_Factor= data['Dilution Factor']

# in each channel, calculate "sample-reference" values

data['Diff_Red']= data['RedSample']-data['RedReference']
data['Diff_Green']= data['GreenSample']-data['GreenReference']
data['Diff_Blue']= data['BlueSample']-data['BlueReference']

Diff_values= data[['Diff_Red','Diff_Green','Diff_Blue']]


Diff_values.head(20)

Code cells below is for normal linear regression and linear regression using RANSAC. Just used to test their difference in terms of r2 error. 
### No need to run these two code cells below

In [None]:
#linear regression model taking all the channels (R,G,B, Grey) as input features 
lr = LinearRegression()

kf=KFold(n_splits=5, random_state=None, shuffle=True)

kf_info= kf.get_n_splits(Diff_values, Dilution_Factor) # returns the number of splitting iterations in the cross-validator
print(kf_info) 
i=1

for train_index, test_index in kf.split(Diff_values, Dilution_Factor):
  lr.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
  print ("For Split:",i,",Prediciton Value: ", lr.predict([[-8.212419,   6.876871,  -2.027549]]))
  print ("For Split:",i,",coefficients: ", lr. coef_)
  R2= lr.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
  print ("For Split:",i,",Score (R2): ", R2)
  n= test_index.shape[0] # number of samples 
  p= 3 #number of predictors used, here ,we have used 3 (i.e, L,a,b)
  Adjusted_R2= 1- (1- R2)*((n-1)/(n-p-1))
  print ("Adjusted R2:", Adjusted_R2)
  i+=1
  

In [None]:
# using RANSAC algorithm to fit linear regression robustly
ransac = RANSACRegressor()
kf=KFold(n_splits=5, random_state=None, shuffle=True)

kf_info= kf.get_n_splits(Diff_values, Dilution_Factor) # returns the number of splitting iterations in the cross-validator
print(kf_info) 
i=1

for train_index, test_index in kf.split(Diff_values, Dilution_Factor):
  ransac.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
  print ("For Split:",i,",Prediciton Value: ", ransac.predict([[-8.212419,   6.876871,  -2.027549]]))
  print ("For Split:",i,",coefficients: ", ransac.estimator_.coef_)
  R2= ransac.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
  print ("For Split:",i,",Score (R2): ", R2)
  n= test_index.shape[0] # number of samples 
  p= 3 #number of predictors used, here ,we have used 3 (i.e, L,a,b)
  Adjusted_R2= 1- (1- R2)*((n-1)/(n-p-1))
  print ("Adjusted R2:", Adjusted_R2)
  i+=1
  


In [None]:

### observe how dilution factor is correlated with values from each channel (R,G,B) for sample 
plt.figure(2, figsize=(15,5))


# observe the relation between red channel value and dilution factor, for sample only
plt.subplot(131)

lr_redsample = LinearRegression()
lr_redsample.fit(data[['RedSample']],Dilution_Factor)

plt.scatter(Dilution_Factor,data['RedSample'],color='orange', marker='.',label='Sample')
plt.plot(lr_redsample.predict(data[['RedSample']]),data['RedSample'], color='navy', linewidth=2, label='Linear regressor')
plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("Red Channel")




# observe the relation between green channel value and dilution factor, for sample only
plt.subplot(132)



lr_greensample = LinearRegression()
lr_greensample.fit(data[['GreenSample']],Dilution_Factor)

plt.scatter(Dilution_Factor,data['GreenSample'], color='orange', marker='.',label='Sample')
plt.plot(lr_greensample.predict(data[['GreenSample']]),data['GreenSample'], color='navy', linewidth=2, label='Linear regressor')
plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("Green Channel")




# observe the relation between blue channel value and dilution factor, for sample only
plt.subplot(133)

t = np.arange(0.01, 0.8, 0.001)
lr_bluesample = LinearRegression()
lr_bluesample.fit(data[['BlueSample']],Dilution_Factor)

plt.scatter(Dilution_Factor,data['BlueSample'], color='orange', marker='.',label='Sample')
plt.plot(lr_bluesample.predict(data[['BlueSample']]),data['BlueSample'], color='navy', linewidth=2, label='Linear regressor')
plt.plot(t,-80*np.log10(t),color='red', linewidth=2, label='Log Curve')
plt.legend(loc='upper right')
plt.xlabel("Dilution Factor")
plt.ylabel("Blue Channel")

In [None]:


plt.figure(1, figsize=(15,5))

# observe the relation between red channel value and dilution factor, when Red value= Reference- Sample
plt.subplot(131)

lr_red = LinearRegression()
lr_red.fit(Diff_values[['Diff_Red']],Dilution_Factor)
plt.scatter(Dilution_Factor,Diff_values['Diff_Red'], color='yellowgreen', marker='.',label='True Value')
plt.plot( lr_red.predict(Diff_values[['Diff_Red']]), Diff_values['Diff_Red'],color='navy', linewidth=2, label='Linear regressor')

plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("Red Channel (Sample-Ref)")

# observe the relation between green channel value and dilution factor, when Green Value= Reference- Sample
plt.subplot(132)

lr_green = LinearRegression()
lr_green.fit(Diff_values[['Diff_Green']],Dilution_Factor)
plt.scatter(Dilution_Factor,Diff_values['Diff_Green'], color='yellowgreen', marker='.',label='True Value')
plt.plot( lr_green.predict(Diff_values[['Diff_Green']]), Diff_values['Diff_Green'],color='navy', linewidth=2, label='Linear regressor')

plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("Green Channel (Sample-Ref)")


# observe the relation between blue channel value and dilution factor, when Blue Value= Reference- Sample
plt.subplot(133)

t = np.arange(0.01, 0.8, 0.001)

lr_blue = LinearRegression()
lr_blue.fit(Diff_values[['Diff_Blue']],Dilution_Factor)
plt.scatter(Dilution_Factor, Diff_values['Diff_Blue'],color='yellowgreen', marker='.',label='True Value')
plt.plot( lr_blue.predict(Diff_values[['Diff_Blue']]), Diff_values['Diff_Blue'], color='navy', linewidth=2, label='Linear regressor')
plt.plot(t,-80*np.log10(t),color='red', linewidth=2, label='Log Curve')

plt.legend(loc='upper right')
plt.xlabel("Dilution Factor")
plt.ylabel("Blue Channel (Sample-Ref)")




plt.show()




In [None]:
#linear regression model taking all the channels (R,G,B) as input features 
lr = LinearRegression()

kf=KFold(n_splits=5, random_state=None, shuffle=True)

kf_info= kf.get_n_splits(Diff_values, Dilution_Factor) # returns the number of splitting iterations in the cross-validator
print(kf_info) 
i=1
Mean_Adj_R2=[]

for train_index, test_index in kf.split(Diff_values, Dilution_Factor):
  lr.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
  print ("For Split:",i,",Prediciton Value: ", lr.predict([[-2.273275,	91.307025,	212.647976]]))
  print ("For Split:",i,",coefficients: ", lr. coef_)
  R2= lr.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
  print ("For Split:",i,",Score (R2): ", R2)
  n= test_index.shape[0] # number of samples 
  p= 3 #number of predictors used, here ,we have used 3 (i.e, L,a,b)
  Adjusted_R2= 1- (1- R2)*((n-1)/(n-p-1))
  print ("Adjusted R2:", Adjusted_R2)
  Mean_Adj_R2.append(Adjusted_R2)
  i+=1
print ("Mean Adjusted R2:",np.mean(Mean_Adj_R2))

The cell below was just used as a test cell to observe if channels could be correlated with the dilution factor using some higher order polynomial. 

### Fit the Points Using Custom Equation and Visualize 3D plot

Test for RGB Color Space

In [None]:
from scipy.optimize import curve_fit
from mpl_toolkits import mplot3d
plt.figure(3, figsize=(15,15))

#print (Diff_values.values)

## just a test data
x = np.linspace(0, 200, 180) 
y= np.linspace(0,200,180)   
X, Y= np.meshgrid(x, y)
#print(X,Y)


def func(x, a,b,c,d):
  
  return (a+ b*x[:,0]+c*x[:,1]+d*np.log10(10+x[:,2]))



def pred(x, coff):
  
 
  return (coff[0]+ coff[1]*0 +coff[2]*x[0]+coff[3]*np.log10(10+x[1]))


popt, pcov = curve_fit(func, Diff_values.values, Dilution_Factor)

print (popt)
z=pred((X,Y),popt)
predicted_values= func(Diff_values.values,popt[0],popt[1],popt[2],popt[3])
print (predicted_values)
R2 = r2_score(Dilution_Factor.values, predicted_values,multioutput='variance_weighted')
n= 144 # number of samples 
p= 3 #number of predictors used, here ,we have used 3 (i.e, L,a,b)
Adjusted_R2= 1- (1- R2)*((n-1)/(n-p-1))
print (" Adjusted R2: ", Adjusted_R2)

ax = plt.axes(projection='3d')
ax.scatter(Diff_values['Diff_Green'], Diff_values['Diff_Blue'], Dilution_Factor, color='red', marker='*')
ax.contour3D(x, y, z, 50, cmap='binary')
ax.view_init(20,50)

'''### observe how a 2nd degree polynomial fits to the channel values (for, Reference- Sample)
polynomial_features= PolynomialFeatures(degree=2)
X_poly = polynomial_features.fit_transform(Diff_values[['Diff_Blue']])
poly_reg = LinearRegression()
poly_reg.fit(X_poly,Dilution_Factor )
Predicted_dilution= poly_reg.predict(X_poly)
print (poly_reg.predict(polynomial_features.fit_transform([[206.85]])))

print ("coefficients: ", model. coef_)

plt.scatter(Diff_values['Diff_Blue'],Dilution_Factor, color='yellowgreen', marker='.',label='True Dilution Factor')
plt.plot(Diff_values['Diff_Blue'], Predicted_dilution, color='navy', linewidth=2, label='Polynomial Fit')'''

ax.set_xlabel('G channel')
ax.set_ylabel('B channel')
ax.set_zlabel('dilution factor')

In [None]:
####regressors
reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
reg3 = LinearRegression()

kf=KFold(n_splits=5, random_state=None, shuffle=True)

kf_info= kf.get_n_splits(Diff_values, Dilution_Factor) # returns the number of splitting iterations in the cross-validator
print(kf_info) 
i=1
Mean_Adj_R2=[]

for train_index, test_index in kf.split(Diff_values, Dilution_Factor):
    reg1.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    reg2.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    reg3.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    
    
    print ("True Value:", Dilution_Factor.loc[test_index].values) 
    print ("For Split:",i,",Prediciton Value: ", reg1.predict(Diff_values.loc[test_index]))
    print ("True Value:", Dilution_Factor.loc[test_index].values)
    print ("For Split:",i,",Prediciton Value: ", reg2.predict(Diff_values.loc[test_index]))
    print ("True Value:", Dilution_Factor.loc[test_index].values)
    print ("For Split:",i,",Prediciton Value: ", reg3.predict(Diff_values.loc[test_index]))
    
    
    R21= reg1.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    R22= reg2.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    R23= reg3.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
   
    
    
    n= test_index.shape[0] # number of samples 
    p= 3 #number of predictors used, here ,we have used 3 (i.e, R,G,B)

    Adjusted_R21= 1- (1- R21)*((n-1)/(n-p-1))
    Adjusted_R22= 1- (1- R22)*((n-1)/(n-p-1))
    Adjusted_R23= 1- (1- R23)*((n-1)/(n-p-1))
    
    
    print ("Fold : ", i)
    print ("Adjusted R2:", Adjusted_R21)
    print ("Adjusted R2:", Adjusted_R22)
    print ("Adjusted R2:", Adjusted_R23)
    
    i+=1


In [None]:
reg = linear_model.BayesianRidge()
reg.fit(X, Y)  

### Now we move to HSV channel, same experiments are performed with HSV values as the features

In [None]:
# load data for HSV channels
data= pd.read_excel('C:/Users/Brinda Khanal/Documents/Bidur Git Repo/padColorimetry/Data/mean_pixel_new_HSV.xlsx')


# retrive dilution factor 
Dilution_Factor= data['Dilution Factor']

# in each channel, calculate "sample-reference" values ,here we have used sample- refernce instead of reference - sample. The relation is same, just the sign is reversed.
# This is done just to obtain the position value of H value.

data['Diff_H']= data['HSample']-data['HReference']
data['Diff_S']= data['SSample']-data['SReference']
data['Diff_V']= data['VSample']-data['VReference']

Diff_values= data[['Diff_H','Diff_S','Diff_V']]


Diff_values.head()

In [None]:
plt.figure(1, figsize=(15,5))

# observe the relation between H channel value and dilution factor, when H value= Reference- Sample
plt.subplot(131)

lr_H = LinearRegression()
lr_H.fit(Diff_values[['Diff_H']],Dilution_Factor)
plt.scatter(Dilution_Factor, Diff_values['Diff_H'], color='yellowgreen', marker='.',label='True Value')
plt.plot( lr_H.predict(Diff_values[['Diff_H']]),Diff_values['Diff_H'], color='navy', linewidth=2, label='Linear regressor')

plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("H Channel (Sample-Ref)")

# observe the relation between S channel value and dilution factor, when S Value= Reference- Sample
plt.subplot(132)
t = np.arange(0.01, 0.8, 0.001)
lr_S = LinearRegression()
lr_S.fit(Diff_values[['Diff_S']],Dilution_Factor)
plt.scatter(Dilution_Factor, Diff_values['Diff_S'],color='yellowgreen', marker='.',label='True Value')
plt.plot(lr_S.predict(Diff_values[['Diff_S']]),Diff_values['Diff_S'],  color='navy', linewidth=2, label='Linear regressor')
plt.plot(t,80*np.log10(t),color='red', linewidth=2, label='Log Curve')

plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("S Channel (Sample-Ref)")


# observe the relation between V channel value and dilution factor, when V Value= Reference- Sample
plt.subplot(133)

lr_V = LinearRegression()
lr_V.fit(Diff_values[['Diff_V']],Dilution_Factor)
plt.scatter(Dilution_Factor, Diff_values['Diff_V'], color='yellowgreen', marker='.',label='True Value')
plt.plot( lr_V.predict(Diff_values[['Diff_V']]), Diff_values['Diff_V'], color='navy', linewidth=2, label='Linear regressor')

plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("V Channel(Sample-Ref)")

In [None]:
#linear regression model taking all the channels HSV as input features 
lr = LinearRegression()

kf=KFold(n_splits=5, random_state=None, shuffle=True)

kf_info= kf.get_n_splits(Diff_values, Dilution_Factor) # returns the number of splitting iterations in the cross-validator
print(kf_info) 
i=1
Mean_Adj_R2=[]
for train_index, test_index in kf.split(Diff_values, Dilution_Factor):
  lr.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
  print ("For Split:",i,",Prediciton Value: ", lr.predict([[8.008504,	-226.417486,	-2.284879]]))
  print ("For Split:",i,",coefficients: ", lr. coef_)
  R2= lr.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
  print ("For Split:",i,",Score (R2): ", R2)
  n= test_index.shape[0] # number of samples 
  p= 3 #number of predictors used, here ,we have used 3 (i.e, L,a,b)
  Adjusted_R2= 1- (1- R2)*((n-1)/(n-p-1))
  print ("Adjusted R2:", Adjusted_R2)
  Mean_Adj_R2.append(Adjusted_R2)
  i+=1
print ("Mean Adjusted R2:",np.mean(Mean_Adj_R2))
  

The cell below was just used as a test cell to observe if channels could be correlated with the dilution factor using some higher order polynomial. 

### Fit the Points Using Custom Equation and Visualize 3D plot

Test for HSV

In [None]:
from scipy.optimize import curve_fit
from mpl_toolkits import mplot3d



plt.figure(3, figsize=(15,15))

#print (Diff_values.values)

## just a test data
x = np.linspace(-200, 50, 250) 
y= np.linspace(-200,50,250)   
X, Y= np.meshgrid(x, y)
#print(X,Y)


def func(x, a,b,c,d):
  
  return (a+ b*x[:,0]+c*np.log10(10-x[:,1])+d*x[:,2])



def pred(x, coff):
  
 
  return (coff[0]+ coff[1]*x[0] +coff[2]*np.log10(10-x[1])+coff[2]*0)


popt, pcov = curve_fit(func, Diff_values.values, Dilution_Factor)

print (popt)
z=pred((X,Y),popt)
predicted_values= func(Diff_values.values,popt[0],popt[1],popt[2],popt[3])
print (predicted_values)
R2 = r2_score(Dilution_Factor.values, predicted_values,multioutput='variance_weighted')
n= 144 # number of samples 
p= 3 #number of predictors used, here ,we have used 3 (i.e, L,a,b)
Adjusted_R2= 1- (1- R2)*((n-1)/(n-p-1))
print (" Adjusted R2: ", Adjusted_R2)

ax = plt.axes(projection='3d')
ax.scatter(Diff_values['Diff_H'], Diff_values['Diff_S'], Dilution_Factor, color='red', marker='*')
ax.contour3D(x, y, z, 50, cmap='binary')
ax.view_init(-10,30)

'''### observe how a 2nd degree polynomial fits to the channel values (for, Reference- Sample)
polynomial_features= PolynomialFeatures(degree=2)
X_poly = polynomial_features.fit_transform(Diff_values[['Diff_Blue']])
poly_reg = LinearRegression()
poly_reg.fit(X_poly,Dilution_Factor )
Predicted_dilution= poly_reg.predict(X_poly)
print (poly_reg.predict(polynomial_features.fit_transform([[206.85]])))

print ("coefficients: ", model. coef_)

plt.scatter(Diff_values['Diff_Blue'],Dilution_Factor, color='yellowgreen', marker='.',label='True Dilution Factor')
plt.plot(Diff_values['Diff_Blue'], Predicted_dilution, color='navy', linewidth=2, label='Polynomial Fit')'''

ax.set_xlabel('H channel')
ax.set_ylabel('S channel')
ax.set_zlabel('dilution factor')

In [None]:
####votingregressor
reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
reg3 = LinearRegression()
ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])

kf=KFold(n_splits=5, random_state=None, shuffle=True)

kf_info= kf.get_n_splits(Diff_values, Dilution_Factor) # returns the number of splitting iterations in the cross-validator
print(kf_info) 
i=1
Mean_Adj_R2=[]

for train_index, test_index in kf.split(Diff_values, Dilution_Factor):
    reg1.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    reg2.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    reg3.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    ereg.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    
    print ("True Value:", Dilution_Factor.loc[test_index].values) 
    print ("For Split:",i,",Prediciton Value: ", reg1.predict(Diff_values.loc[test_index]))
    print ("True Value:", Dilution_Factor.loc[test_index].values)
    print ("For Split:",i,",Prediciton Value: ", reg2.predict(Diff_values.loc[test_index]))
    print ("True Value:", Dilution_Factor.loc[test_index].values)
    print ("For Split:",i,",Prediciton Value: ", reg3.predict(Diff_values.loc[test_index]))
    print ("True Value:", Dilution_Factor.loc[test_index].values)
    print ("For Split:",i,",Prediciton Value: ", ereg.predict(Diff_values.loc[test_index]))
    
    R21= reg1.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    R22= reg2.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    R23= reg3.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    R24= ereg.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    
    
    
    n= test_index.shape[0] # number of samples 
    p= 3 #number of predictors used, here ,we have used 3 (i.e, R,G,B)

    Adjusted_R21= 1- (1- R21)*((n-1)/(n-p-1))
    Adjusted_R22= 1- (1- R22)*((n-1)/(n-p-1))
    Adjusted_R23= 1- (1- R23)*((n-1)/(n-p-1))
    Adjusted_R24= 1- (1- R24)*((n-1)/(n-p-1))
    
    print ("Fold : ", i)
    print ("Adjusted R2:", Adjusted_R21)
    print ("Adjusted R2:", Adjusted_R22)
    print ("Adjusted R2:", Adjusted_R23)
    print ("Adjusted R2:", Adjusted_R24)

    i+=1


### Same experiments are repeated for the values in Lab space, when they are used as the input features.

In [None]:
##### load the xlsx file (the given file)

# load data
data= pd.read_excel('C:/Users/Brinda Khanal/Documents/Bidur Git Repo/padColorimetry/Data/mean_pixel_new_LAB.xlsx')


# retrive dilution factor 
Dilution_Factor= data['Dilution Factor']

# in each channel, calculate "reference-sample" values

data['Diff_L']= data['LSample']-data['LReference']
data['Diff_a']= data['aSample']-data['aReference']
data['Diff_b']= data['bSample']-data['bReference']

Diff_values= data[['Diff_L','Diff_a','Diff_b']]


Diff_values.head()

In [None]:

############plot for the Lab color space

plt.figure(1, figsize=(15,5))

# observe the relation between L value and dilution factor, when L value= Reference- Sample
plt.subplot(131)

lr_L = LinearRegression()
lr_L.fit(Diff_values[['Diff_L']],Dilution_Factor)
plt.scatter(Dilution_Factor, Diff_values['Diff_L'],color='yellowgreen', marker='.',label='True Value')
plt.plot(lr_L.predict(Diff_values[['Diff_L']]),Diff_values['Diff_L'],  color='navy', linewidth=2, label='Linear regressor')

plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("L Channel (Sample-Ref)")

# observe the relation between a value and dilution factor, when a Value= Reference- Sample
plt.subplot(132)

lr_a = LinearRegression()
lr_a.fit(Diff_values[['Diff_a']],Dilution_Factor)
plt.scatter(Dilution_Factor,Diff_values['Diff_a'], color='yellowgreen', marker='.',label='True Value')
plt.plot( lr_a.predict(Diff_values[['Diff_a']]),Diff_values['Diff_a'], color='navy', linewidth=2, label='Linear regressor')

plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("a Channel (Sample- Ref)")


# observe the relation between b value and dilution factor, when b Value= Reference- Sample
plt.subplot(133)
t = np.arange(0.01, 0.8, 0.001)

lr_b = LinearRegression()
lr_b.fit(Diff_values[['Diff_b']],Dilution_Factor)
plt.scatter(Dilution_Factor, Diff_values['Diff_b'], color='yellowgreen', marker='.',label='True Value')
plt.plot( lr_b.predict(Diff_values[['Diff_b']]), Diff_values['Diff_b'],color='navy', linewidth=2, label='Linear regressor')
plt.plot(t,10+15*np.log10(t),color='red', linewidth=2, label='Log Curve')

plt.legend(loc='lower right')
plt.xlabel("Dilution Factor")
plt.ylabel("b (Sample- Ref) Channel")

In [None]:
#linear regression model taking all the channels L,a,b as input features 

lr = LinearRegression()

kf=KFold(n_splits=5, random_state=None, shuffle=True)

kf_info= kf.get_n_splits(Diff_values, Dilution_Factor) # returns the number of splitting iterations in the cross-validator
print(kf_info) 
i=1
Mean_Adj_R2=[]
for train_index, test_index in kf.split(Diff_values, Dilution_Factor):
  lr.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
  print ("For Split:",i,",Prediciton Value: ", lr.predict([[-8.212419,   6.876871,  -2.027549]]))  #just testing an example
  print ("For Split:",i,",coefficients: ", lr. coef_)
  R2= lr.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
  print ("For Split:",i,",Score (R2): ", R2)
  n= test_index.shape[0] # number of samples 
  p= 3 #number of predictors used, here ,we have used 3 (i.e, L,a,b)
  Adjusted_R2= 1- (1- R2)*((n-1)/(n-p-1))
  print ("Adjusted R2:", Adjusted_R2)
  Mean_Adj_R2.append(Adjusted_R2)
  i+=1
print ("Mean Adjusted R2:",np.mean(Mean_Adj_R2))

The cell below was just used as a test cell to observe if channels could be correlated with the dilution factor using some higher order polynomial. 

### Fit the Points Using Custom Equation and Visualize 3D plot

Test for Lab Color Space

In [None]:
from scipy.optimize import curve_fit
from mpl_toolkits import mplot3d



plt.figure(3, figsize=(15,15))

#print (Diff_values.values)

## just a test data
x = np.linspace(-80, 80, 180) 
y= np.linspace(-80,80,180)   
X, Y= np.meshgrid(x, y)
#print(X,Y)


def func(x, a,b,c,d):
  
  return (a+ b*x[:,1]+c*np.log10(10-x[:,2])+d*x[:,0])



def pred(x, coff):
  
 
  return (coff[0]+ coff[1]*x[0]+coff[2]*np.log10(10-x[1])+coff[3]*0)


popt, pcov = curve_fit(func, Diff_values.values, Dilution_Factor)

print (popt)
z=pred((X,Y),popt)
predicted_values= func(Diff_values.values,popt[0],popt[1],popt[2],popt[3])
print (predicted_values)
R2 = r2_score(Dilution_Factor.values, predicted_values,multioutput='variance_weighted')
n= 144 # number of samples 
p= 3 #number of predictors used, here ,we have used 3 (i.e, L,a,b)
Adjusted_R2= 1- (1- R2)*((n-1)/(n-p-1))
print (" Adjusted R2: ", Adjusted_R2)

ax = plt.axes(projection='3d')
ax.scatter(Diff_values['Diff_a'], Diff_values['Diff_b'], Dilution_Factor, color='red', marker='*')
ax.contour3D(x, y, z, 50, cmap='binary')
ax.view_init(25,40)

'''### observe how a 2nd degree polynomial fits to the channel values (for, Reference- Sample)
polynomial_features= PolynomialFeatures(degree=2)
X_poly = polynomial_features.fit_transform(Diff_values[['Diff_Blue']])
poly_reg = LinearRegression()
poly_reg.fit(X_poly,Dilution_Factor )
Predicted_dilution= poly_reg.predict(X_poly)
print (poly_reg.predict(polynomial_features.fit_transform([[206.85]])))

print ("coefficients: ", model. coef_)

plt.scatter(Diff_values['Diff_Blue'],Dilution_Factor, color='yellowgreen', marker='.',label='True Dilution Factor')
plt.plot(Diff_values['Diff_Blue'], Predicted_dilution, color='navy', linewidth=2, label='Polynomial Fit')'''

ax.set_xlabel('a channel')
ax.set_ylabel('b channel')
ax.set_zlabel('dilution factor')






In [None]:
####votingregressor
reg1 = GradientBoostingRegressor(random_state=1, n_estimators=10)
reg2 = RandomForestRegressor(random_state=1, n_estimators=10)
reg3 = LinearRegression()
ereg = VotingRegressor([('gb', reg1), ('rf', reg2), ('lr', reg3)])

kf=KFold(n_splits=5, random_state=None, shuffle=True)

kf_info= kf.get_n_splits(Diff_values, Dilution_Factor) # returns the number of splitting iterations in the cross-validator
print(kf_info) 
i=1
Mean_Adj_R2=[]

for train_index, test_index in kf.split(Diff_values, Dilution_Factor):
    reg1.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    reg2.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    reg3.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    ereg.fit(Diff_values.loc[train_index],Dilution_Factor.loc[train_index])
    
    print ("True Value:", Dilution_Factor.loc[test_index].values) 
    print ("For Split:",i,",Prediciton Value: ", reg1.predict(Diff_values.loc[test_index]))
    print ("True Value:", Dilution_Factor.loc[test_index].values)
    print ("For Split:",i,",Prediciton Value: ", reg2.predict(Diff_values.loc[test_index]))
    print ("True Value:", Dilution_Factor.loc[test_index].values)
    print ("For Split:",i,",Prediciton Value: ", reg3.predict(Diff_values.loc[test_index]))
    print ("True Value:", Dilution_Factor.loc[test_index].values)
    print ("For Split:",i,",Prediciton Value: ", ereg.predict(Diff_values.loc[test_index]))
    
    R21= reg1.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    R22= reg2.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    R23= reg3.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    R24= ereg.score(Diff_values.loc[test_index],Dilution_Factor.loc[test_index])
    
    
    
    n= test_index.shape[0] # number of samples 
    p= 3 #number of predictors used, here ,we have used 3 (i.e, R,G,B)

    Adjusted_R21= 1- (1- R21)*((n-1)/(n-p-1))
    Adjusted_R22= 1- (1- R22)*((n-1)/(n-p-1))
    Adjusted_R23= 1- (1- R23)*((n-1)/(n-p-1))
    Adjusted_R24= 1- (1- R24)*((n-1)/(n-p-1))
    
    print ("Fold : ", i)
    print ("Adjusted R2:", Adjusted_R21)
    print ("Adjusted R2:", Adjusted_R22)
    print ("Adjusted R2:", Adjusted_R23)
    print ("Adjusted R2:", Adjusted_R24)

    i+=1
