In [1]:
import numpy as np
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel
from sklearn.model_selection import train_test_split, cross_val_score, KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import *

In [2]:
# Load data
data = pd.read_csv('augmented_rgb_1.csv')

# Split input and output data
X = data.iloc[0:, [1,2,3,5]].values
y = data.iloc[0:, -2].values

# Standardize the input data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define kernel
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, alpha=0.1)

# Define K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Define hyperparameters for grid search
param_grid = {'kernel': [ConstantKernel(1.0, (1e-3, 1e4)) * RBF(length_scale=1, length_scale_bounds=(1e-2, 1e2)),
                         ConstantKernel(1.0, (1e-3, 1e4)) * RBF(length_scale=10, length_scale_bounds=(1e-2, 1e2))],
              'alpha': [0.01, 0.1, 1, 10]}

# Perform grid search to optimize kernel and alpha
grid_search = GridSearchCV(gp, param_grid=param_grid, cv=kfold, scoring='r2')
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and the best R2 score
best_kernel = grid_search.best_estimator_.kernel
best_alpha = grid_search.best_estimator_.alpha
best_r2 = grid_search.best_score_

# Define Gaussian Process Regressor with best hyperparameters
gp = GaussianProcessRegressor(kernel=best_kernel, alpha=best_alpha)

# Fit the model on the training data
gp.fit(X_train, y_train)

# Predict the output for the testing data
y_pred = gp.predict(X_test)
print(y_pred)

# Evaluate the performance on the testing data
r2 = r2_score(y_test, y_pred)

# Print
print('Best kernel:', best_kernel)
print('Best alpha:', best_alpha)
print('Cross-validation R2 scores:', grid_search.cv_results_['mean_test_score'])
print('Mean cross-validation R2 score:', np.mean(grid_search.cv_results_['mean_test_score']))
print('Testing R2 score:', r2)

[49.65740726 49.71373943 17.11587634 19.62423127 47.52213424  6.84920099
 41.12630384  6.87392238 25.56403233  7.89777584 47.41487532 42.8271232
  6.82554949 40.65302988 27.31480011  8.03720189 46.90839669 21.63717358
  5.05329441 13.91814167 45.55879528 60.73911103 55.95577045 42.9631661
 19.52816636 41.79069641  7.07918228  7.28264737 47.22979489 14.80566482
 41.92238524 19.06984377 41.12630384  5.47825933 38.60302845 38.2752004
 15.7235968  13.7502333 ]
Best kernel: 1**2 * RBF(length_scale=1)
Best alpha: 10
Cross-validation R2 scores: [0.74945932 0.74945948 0.80996053 0.8099607  0.88584279 0.88584267
 0.95719034 0.95719031]
Mean cross-validation R2 score: 0.8506132654542944
Testing R2 score: 0.9803200441727585


In [3]:
# Load data
data = pd.read_csv('rgb_train1.csv')

# Split input and output data
X = data.iloc[1:, [1,2,3,5]].values
y_train = data.iloc[1:, -2].values

# Standardize the input data
scaler = StandardScaler()
X_train = scaler.fit_transform(X)

# Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Load data
data = pd.read_csv('rgb_test1.csv')

# Split input and output data
X_test = data.iloc[0:, [1,2,3,5]].values
y_test = data.iloc[0:, -2].values

scaler = StandardScaler()
X_test = scaler.fit_transform(X_test)


# Define kernel
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, alpha=0.1)

# Define K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Define hyperparameters for grid search
param_grid = {'kernel': [ConstantKernel(1.0, (1e-3, 1e4)) * RBF(length_scale=1, length_scale_bounds=(1e-2, 1e2)),
                         ConstantKernel(1.0, (1e-3, 1e4)) * RBF(length_scale=10, length_scale_bounds=(1e-2, 1e2))],
              'alpha': [0.01, 0.1, 1, 10]}

# Perform grid search to optimize kernel and alpha
grid_search = GridSearchCV(gp, param_grid=param_grid, cv=kfold, scoring='r2')
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and the best R2 score
best_kernel = grid_search.best_estimator_.kernel
best_alpha = grid_search.best_estimator_.alpha
best_r2 = grid_search.best_score_

# Define Gaussian Process Regressor with best hyperparameters
gp = GaussianProcessRegressor(kernel=best_kernel, alpha=best_alpha)

# Fit the model on the training data
gp.fit(X_train, y_train)

# Predict the output for the testing data
y_pred = gp.predict(X_test)

print(y_pred)
# Evaluate the performance on the testing data
r2 = r2_score(y_test, y_pred)

# Print
print('Best kernel:', best_kernel)
print('Best alpha:', best_alpha)
print('Cross-validation R2 scores:', grid_search.cv_results_['mean_test_score'])
print('Mean cross-validation R2 score:', np.mean(grid_search.cv_results_['mean_test_score']))
print('Testing R2 score:', r2)

[ 8.54521105  8.31265688  8.41420299  7.6206792   8.92847991 42.73744708
 36.26995746 40.17102005 42.75995252 42.73744708 45.56889132 45.51947475
 45.54455196 45.56889132 46.05506575  2.93099943  2.78459113  3.08862711
  3.08862711  3.35764798 51.14273584 52.04697348 50.23596034 51.26695059
 50.8252952  42.16630565 39.24968744 39.37077227 41.5010093  41.5010093 ]
Best kernel: 1**2 * RBF(length_scale=1)
Best alpha: 10
Cross-validation R2 scores: [0.79326762 0.79326741 0.86850361 0.86850366 0.89642772 0.89642769
 0.94829926 0.94829924]
Mean cross-validation R2 score: 0.8766245267882935
Testing R2 score: 0.9363407400838341


8,8,8,8,8,40

40?,40,40,40,44,44,


44,44,44,7,7,7

7,7,60,60,60,60

60,35?,35,35,35?,35?

In [5]:
# Load data
data = pd.read_csv('rgb_train2.csv')

# Split input and output data
X = data.iloc[1:, [1,2,3,5]].values
y_train = data.iloc[1:, -2].values

# Standardize the input data
scaler = StandardScaler()
X_train = scaler.fit_transform(X)

# Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Load data
data = pd.read_csv('rgb_test2.csv')

# Split input and output data
X_test = data.iloc[0:, [1,2,3,5]].values
y_test = data.iloc[0:, -2].values

scaler = StandardScaler()
X_test = scaler.fit_transform(X_test)


# Define kernel
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2))
gp = GaussianProcessRegressor(kernel=kernel, alpha=0.1)

# Define K-fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Define hyperparameters for grid search
param_grid = {'kernel': [ConstantKernel(1.0, (1e-3, 1e4)) * RBF(length_scale=1, length_scale_bounds=(1e-2, 1e2)),
                         ConstantKernel(1.0, (1e-3, 1e4)) * RBF(length_scale=10, length_scale_bounds=(1e-2, 1e2))],
              'alpha': [0.01, 0.1, 1, 10]}

# Perform grid search to optimize kernel and alpha
grid_search = GridSearchCV(gp, param_grid=param_grid, cv=kfold, scoring='r2')
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and the best R2 score
best_kernel = grid_search.best_estimator_.kernel
best_alpha = grid_search.best_estimator_.alpha
best_r2 = grid_search.best_score_

# Define Gaussian Process Regressor with best hyperparameters
gp = GaussianProcessRegressor(kernel=best_kernel, alpha=best_alpha)

# Fit the model on the training data
gp.fit(X_train, y_train)

# Predict the output for the testing data
y_pred = gp.predict(X_test)

print(y_pred)
# Evaluate the performance on the testing data
r2 = r2_score(y_test, y_pred)
# accuracy = accuracy_score(y_test, y_pred)

# Print
print('Best kernel:', best_kernel)
print('Best alpha:', best_alpha)
print('Cross-validation R2 scores:', grid_search.cv_results_['mean_test_score'])
print('Mean cross-validation R2 score:', np.mean(grid_search.cv_results_['mean_test_score']))
print('Testing R2 score:', r2)

[41.83293635 37.4453959  42.89443138 42.10037043 37.81815986 44.21633344
 46.28873743 44.16192388 43.46929009 42.85047181  6.0059757   5.95382137
  6.46252308  5.80441106  5.6722471  39.93561641 36.6772554  39.53157655
 37.64395513 36.8546298  51.46465741 50.99003268 51.61933877 50.86105426
 52.08120954  5.40360819  5.32201534  5.40360819  5.40360819  5.32201534]
Best kernel: 1**2 * RBF(length_scale=10)
Best alpha: 10
Cross-validation R2 scores: [0.79393306 0.79393307 0.85106529 0.8510653  0.94138434 0.94138427
 0.97122304 0.97122304]
Mean cross-validation R2 score: 0.8894014270884076
Testing R2 score: 0.727691604651632


45,45?,45,45,45?,46

46,46,46,46,15,15,

15,15,15,55,55,55,

55,55,40,40,40,40,

40,5,5,5,5,5