This notebook takes in sequence features and runs them through a different models. All model hyperparameters are 
selected using optuna. Models are run across 10 folds and benchmarking results are plotted
for mean squared error and pearson's correlation coefficient



In [6]:
#import libraries
import pandas as pd
import numpy as np 
import sklearn
import os
ROOT_PATH = os.path.abspath('')
ROOT_PATH

'C:\\Users\\dasak\\OneDrive\\Documents\\GitHub\\cas12-collateral-cleavage-prediction'

In [7]:
#Load up guide RNA And target DNA as dataframes
df_guide_RNA = pd.read_csv(ROOT_PATH + "/full_guide_RNA_sequences.csv")
df_target_DNA = pd.read_csv(ROOT_PATH + "/target_DNA_sequences.csv")

# # Load k_value_sigmoid from experiment on 24_05_2022 
from numpy import genfromtxt
k_values = genfromtxt(ROOT_PATH + '/k_values_sigmoid.csv', delimiter=',')
k_values

array([ 0.11984317,  0.13894219,  0.15437609,  0.06970868,  0.13297348,
        0.12425794,  0.18793106,  0.27095051,  0.16818124,  0.1347531 ,
        0.29226458,  0.12293203,  0.38708549,  0.19778096,  0.3622934 ,
        0.34890034,  0.20684573,  0.11417769, -0.07936754,  0.31408586,
        0.18672599,  0.13433442,  0.1847865 ,  0.24778192,  0.42079963,
        0.35078867,  0.09680571,  0.5202575 ,  0.22697745,  0.3248318 ,
        0.1702902 ,  0.24634137,  0.14220321, -2.51561011,  0.17162254,
        0.04222993,  0.24852536,  0.296885  ,  0.27256568,  0.32140478,
        0.21027936,  0.1127019 ,  0.20870236,  0.28398043,  0.32744511,
        0.2282549 ,  0.14870497,  0.32030304,  0.2445419 ,  0.08811278,
        0.44583873,  0.15249895,  0.23265821,  0.22829356,  0.2489641 ,
        0.16632398,  0.27098826,  0.18329734,  0.11389634,  0.29132781,
        0.35664269, -0.2822598 ,  0.14580803,  0.06875822,  0.09644608,
        0.10429324,  0.16959348,  0.32049426,  0.3353713 ,  0.18

In [8]:
def one_hot_encode_DNA(DNA_sequence):
    mapping = dict(zip("atcg", range(4)))    
    one_hot_encoded_sequence = [mapping[i] for i in DNA_sequence]
    return np.eye(4)[one_hot_encoded_sequence]

In [9]:
def one_hot_encode_RNA(gRNA_sequence):
    mapping = dict(zip("aucg", range(4)))    
    one_hot_encoded_sequence = [mapping[i] for i in gRNA_sequence]
    return np.eye(4)[one_hot_encoded_sequence]

In [10]:
guide_RNA_sequences = df_guide_RNA['Sequence']
guide_RNA_one_hot_encoded_sequences = []
for i in guide_RNA_sequences:
    one_hot_encoded_sequence = one_hot_encode_RNA(i)
    guide_RNA_one_hot_encoded_sequences.append(one_hot_encoded_sequence.ravel())

#generate feature names
feature_names_gRNA = []
for i in range(41):
    feature_names_gRNA.append('guide RNA A' + str(i+1))
    feature_names_gRNA.append('guide RNA U' + str(i+1))
    feature_names_gRNA.append('guide RNA C' + str(i+1))
    feature_names_gRNA.append('guide RNA G' + str(i+1))

In [52]:
#Add all objective functions to one cell 
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats.mstats import spearmanr
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from functools import *
from sklearn.linear_model import LinearRegression,Lasso, Ridge, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor 
from lightgbm import LGBMRegressor
from sklearn.svm import SVR

#Objective function for ridge regression 
def objective_ridge(trial,x,y):
    
    alpha = trial.suggest_float('alpha', 0, 1, log = True)
    solver = trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'])
    
    
    model = Ridge(
        alpha = alpha,
        solver = solver,
        )
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)

#Objective function for lasso regression 
def objective_lasso(trial,x,y):
    alpha = trial.suggest_float('alpha', 0, 1, log = True)
    
    model = Lasso(
        alpha = alpha
        )
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)

#Objective function for ElasticNet regression 
def objective_ElasticNet(trial,x,y):
    alpha = trial.suggest_float('alpha', 0, 1, log = True)
    l1_ratio = trial.suggest_float('l1_ratio',0,1)
    
    model = ElasticNet(
        alpha = alpha,
        l1_ratio = l1_ratio
        )
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)

#Objective function for decision tree regression 
def objective_dt(trial,x,y):
    
    ccp_alpha = trial.suggest_float('ccp_alpha', 0, 1, log = True)
    criterion = trial.suggest_categorical('criterion', ['squared_error', 'friedman_mse', 'absolute_error'])
    
    model =  DecisionTreeRegressor(
        ccp_alpha = ccp_alpha,
        criterion = criterion,
        random_state = 42)
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)

#Objective function for random forest regression
def objective_rf(trial,x,y):
    n_estimators = trial.suggest_int("n_estimators", 100, 1000)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 30)
    max_samples = trial.suggest_float("max_samples", 0.01, 1.0)
    max_features = trial.suggest_float("max_features", 0.01, 1.0)
    
    model = RandomForestRegressor(
        n_estimators = n_estimators,
        min_samples_leaf = min_samples_leaf,
        max_samples = max_samples,
        max_features = max_features, 
        random_state = 42)
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)

#Objective function of xgboost regression 
def objective_xgb(trial,x,y):
    eta = trial.suggest_float("eta", 0, 0.2)
    max_depth = trial.suggest_int("max_depth", 7, 11)
    min_child_weight = trial.suggest_int("min_child_weight", 2, 6)
    subsample = trial.suggest_float("subsample", 0.6, 1.0)
    reg_lambda = trial.suggest_float("lambda", 0.8, 1.0)
    
    model = XGBRegressor(
        eta = eta,
        max_depth = max_depth,
        min_child_weight = min_child_weight,
        subsample = subsample,
        reg_lambda = reg_lambda,
        nthread = 1)
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)

#Objective function for lightgbm regression 
def objective_lgbm(trial,x,y):
    boosting_type = trial.suggest_categorical("boosting_type", ['gbdt', 'dart', 'goss'])
    num_leaves = trial.suggest_int("num_leaves", 2, 50)
    max_depth = trial.suggest_int("max_depth", -1, 50)
    learning_rate = trial.suggest_float("learning_rate", 0.001, 0.1)
    n_estimators = trial.suggest_int("n_estimators", 50, 200)
    min_child_weight = trial.suggest_float("min_child_weight", 0.001, 0.005)
        
    model = LGBMRegressor(
        boosting_type = boosting_type,
        num_leaves = num_leaves,
        max_depth = max_depth,
        learning_rate = learning_rate,
        n_estimators = n_estimators,
        min_child_weight = min_child_weight,
        )
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)

#Objective function for support vector machine regressor 
def objective_svr(trial,x,y):
    kernel = trial.suggest_categorical("kernel", ["linear", "poly", "rbf", "sigmoid"])
    degree = trial.suggest_int("degree", 1, 10)
    C = trial.suggest_float("C", 0, 5)
    epsilon = trial.suggest_float("epsilon", 0, 5)
    
        
    model = SVR(
        kernel = kernel,
        degree = degree,
        C = C,
        epsilon = epsilon
        )
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 5, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)

In [17]:
from sklearn.metrics import make_scorer

#Function for spearman rank scorer
def spearman_rank_scorer(model,X,Y):
    Y_predict = model.predict(X)
    spearman_rank = spearmanr(Y,Y_predict)
    rho = spearman_rank[0]
    return(rho)



In [18]:
#Linear regression with guide RNA one hot encoding
model = LinearRegression()
score = cross_validate(model, guide_RNA_one_hot_encoded_sequences, k_values, cv = 10, scoring = spearman_rank_scorer)
score_mean = np.mean(score['test_score'])
score_mean

0.045708443218873775

In [53]:
#Optimisation study for ridge regression with guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_ridge,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ridge_gRNA = study.best_value



[32m[I 2022-10-22 13:39:07,108][0m A new study created in memory with name: no-name-265a98ac-d536-4426-9e12-826fe5a72c87[0m
[33m[W 2022-10-22 13:39:07,111][0m Trial 0 failed because of the following error: ValueError('The `low` value must be larger than 0 for a log distribution (low=0, high=1).')[0m
Traceback (most recent call last):
  File "C:\Users\dasak\anaconda3\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\dasak\AppData\Local\Temp\ipykernel_18164\2526002156.py", line 19, in objective_ridge
    alpha = trial.suggest_float('alpha', 0, 1, log = True)
  File "C:\Users\dasak\anaconda3\lib\site-packages\optuna\trial\_trial.py", line 153, in suggest_float
    distribution = FloatDistribution(low, high, log=log, step=step)
  File "C:\Users\dasak\anaconda3\lib\site-packages\optuna\distributions.py", line 151, in __init__
    raise ValueError(
ValueError: The `low` value must be larger than 0 for a log distribu

ValueError: The `low` value must be larger than 0 for a log distribution (low=0, high=1).

In [20]:
#Optimisation study for lasso regression with guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_lasso,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lasso_gRNA = study.best_value


[32m[I 2022-10-22 12:56:42,460][0m A new study created in memory with name: no-name-8781c813-d694-4e33-87f6-3aa7f59af3b5[0m
[32m[I 2022-10-22 12:56:42,506][0m Trial 0 finished with value: 0.0 and parameters: {'alpha': 0.8190603690625732}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 12:56:42,566][0m Trial 1 finished with value: 0.0 and parameters: {'alpha': 0.7293940922395016}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 12:56:42,626][0m Trial 2 finished with value: 0.0 and parameters: {'alpha': 0.76733084127539}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 12:56:42,689][0m Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.12036248231709912}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 12:56:42,746][0m Trial 4 finished with value: 0.0 and parameters: {'alpha': 0.8255076023519347}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 12:56:42,803][0m Trial 5 finished with value: 0.0 and parameters: {'alpha': 0.737643

In [21]:
#Optimisation study for Elastic Net with guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_ElasticNet,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ElastiNet_gRNA = study.best_value




[32m[I 2022-10-22 12:56:48,889][0m A new study created in memory with name: no-name-7d3946af-f403-4d16-b84a-27e759656ada[0m
[32m[I 2022-10-22 12:56:48,936][0m Trial 0 finished with value: 0.03779569405718889 and parameters: {'alpha': 0.6355858435978896, 'l1_ratio': 0.02560193150803558}. Best is trial 0 with value: 0.03779569405718889.[0m
[32m[I 2022-10-22 12:56:48,998][0m Trial 1 finished with value: 0.059527568947960276 and parameters: {'alpha': 0.08428027313684039, 'l1_ratio': 0.1809042348640838}. Best is trial 1 with value: 0.059527568947960276.[0m
[32m[I 2022-10-22 12:56:49,059][0m Trial 2 finished with value: 0.0 and parameters: {'alpha': 0.7222446080400236, 'l1_ratio': 0.6303158339673431}. Best is trial 1 with value: 0.059527568947960276.[0m
[32m[I 2022-10-22 12:56:49,115][0m Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.5219882656543956, 'l1_ratio': 0.5385811129808106}. Best is trial 1 with value: 0.059527568947960276.[0m
[32m[I 2022-10-22 12:56:49

In [24]:
#Optimisation study for decision tree regression with guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_dt,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
dt_gRNA = study.best_value

[32m[I 2022-10-22 13:00:31,572][0m A new study created in memory with name: no-name-cb6cae6e-0afa-41fe-bb4a-9562046a524a[0m
[32m[I 2022-10-22 13:00:31,989][0m Trial 0 finished with value: 0.0 and parameters: {'ccp_alpha': 0.2743503168959598, 'criterion': 'absolute_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:00:32,076][0m Trial 1 finished with value: 0.0 and parameters: {'ccp_alpha': 0.9073461537183963, 'criterion': 'squared_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:00:32,162][0m Trial 2 finished with value: 0.0 and parameters: {'ccp_alpha': 0.9950708518044974, 'criterion': 'squared_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:00:32,250][0m Trial 3 finished with value: 0.0 and parameters: {'ccp_alpha': 0.9915111836810917, 'criterion': 'friedman_mse'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:00:32,340][0m Trial 4 finished with value: 0.0 and parameters: {'ccp_alpha': 0.8643594557308283, '

In [None]:
#Optimisation study for random forest regression with guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_rf,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
rf_gRNA = study.best_value

In [25]:
#Optimisation study for xgboost regression with guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_xgb,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
xgb_gRNA = study.best_value

[32m[I 2022-10-22 13:00:54,112][0m A new study created in memory with name: no-name-243b55f4-ce0a-49c7-ad1e-1f1fc3ce3d01[0m
[32m[I 2022-10-22 13:00:57,812][0m Trial 0 finished with value: 0.14105894105894107 and parameters: {'eta': 0.08073733130914591, 'max_depth': 10, 'min_child_weight': 5, 'subsample': 0.9651311746738434, 'lambda': 0.9637685580275251}. Best is trial 0 with value: 0.14105894105894107.[0m
[32m[I 2022-10-22 13:01:01,687][0m Trial 1 finished with value: 0.09840159840159841 and parameters: {'eta': 0.1771057360181021, 'max_depth': 11, 'min_child_weight': 4, 'subsample': 0.8465238407931315, 'lambda': 0.9482197813441587}. Best is trial 0 with value: 0.14105894105894107.[0m
[32m[I 2022-10-22 13:01:05,671][0m Trial 2 finished with value: 0.1533966033966034 and parameters: {'eta': 0.11049534550548279, 'max_depth': 10, 'min_child_weight': 2, 'subsample': 0.6108943512917614, 'lambda': 0.8748947672438594}. Best is trial 2 with value: 0.1533966033966034.[0m
[32m[I 2022

In [26]:
#Optimisation study for lightgbm regression with guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_lgbm,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lgbm_gRNA = study.best_value

[32m[I 2022-10-22 13:11:00,765][0m A new study created in memory with name: no-name-7affd964-e260-4f7e-9986-347e33ef1027[0m
[32m[I 2022-10-22 13:11:01,444][0m Trial 0 finished with value: 0.18476523476523476 and parameters: {'boosting_type': 'dart', 'num_leaves': 43, 'max_depth': 5, 'learning_rate': 0.0764750903833901, 'n_estimators': 138, 'min_child_weight': 0.002966646164343122}. Best is trial 0 with value: 0.18476523476523476.[0m
[32m[I 2022-10-22 13:11:02,122][0m Trial 1 finished with value: 0.16873126873126873 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 24, 'max_depth': 39, 'learning_rate': 0.01578867837473744, 'n_estimators': 197, 'min_child_weight': 0.0029854930805180936}. Best is trial 0 with value: 0.18476523476523476.[0m
[32m[I 2022-10-22 13:11:02,628][0m Trial 2 finished with value: 0.1670678007580884 and parameters: {'boosting_type': 'dart', 'num_leaves': 21, 'max_depth': 4, 'learning_rate': 0.0067055690975680905, 'n_estimators': 98, 'min_child_weight

In [28]:
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_svr,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
svr_gRNA = study.best_value

[32m[I 2022-10-22 13:12:01,697][0m A new study created in memory with name: no-name-32c68ea5-33a4-4a59-9c9c-2da9d3f56376[0m
[32m[I 2022-10-22 13:12:01,730][0m Trial 0 finished with value: 0.04312820512820513 and parameters: {'kernel': 'poly', 'degree': 2, 'C': 3.0719471824156908, 'epsilon': 1.3093727814420402}. Best is trial 0 with value: 0.04312820512820513.[0m
[32m[I 2022-10-22 13:12:01,761][0m Trial 1 finished with value: 0.0 and parameters: {'kernel': 'rbf', 'degree': 9, 'C': 3.5022389469476733, 'epsilon': 3.2232722207517472}. Best is trial 0 with value: 0.04312820512820513.[0m
[32m[I 2022-10-22 13:12:01,791][0m Trial 2 finished with value: 0.0 and parameters: {'kernel': 'rbf', 'degree': 5, 'C': 1.3218365677497328, 'epsilon': 4.9255948204827495}. Best is trial 0 with value: 0.04312820512820513.[0m
[32m[I 2022-10-22 13:12:01,828][0m Trial 3 finished with value: -0.0012991452991453073 and parameters: {'kernel': 'rbf', 'degree': 7, 'C': 2.9152318906601735, 'epsilon': 0.7

In [29]:
#One hot encode only the 20 gRNA bases that interact with the target DNA
complementary_guide_RNA_sequences = []
for i in guide_RNA_sequences:
    complementary_guide_RNA_sequence = i[21:41]
    complementary_guide_RNA_sequences.append(complementary_guide_RNA_sequence)

#One hot encode them
complementary_guide_RNA_one_hot_encoded_sequences = []
for i in complementary_guide_RNA_sequences:
    one_hot_encoded_sequence = one_hot_encode_RNA(i).ravel()
    complementary_guide_RNA_one_hot_encoded_sequences.append(one_hot_encoded_sequence)

#generate feature names
feature_names_complementary_gRNA = []
for i in range(20):
    feature_names_complementary_gRNA.append('guide RNA A' + str(i+1))
    feature_names_complementary_gRNA.append('guide RNA U' + str(i+1))
    feature_names_complementary_gRNA.append('guide RNA C' + str(i+1))
    feature_names_complementary_gRNA.append('guide RNA G' + str(i+1))

In [33]:
#Optimisation function for linear regression with complementary guide RNA one hot encoding
model = LinearRegression()
score = cross_validate(model, complementary_guide_RNA_one_hot_encoded_sequences, k_values, cv = 10, scoring = spearman_rank_scorer)
score_mean = np.mean(score['test_score'])
linear_regression_complementary_gRNA = score_mean
linear_regression_complementary_gRNA

0.10281942520601048

In [32]:
#Optimisation function for ridge regression with complementary guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_ridge,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ridge_complementary_gRNA = study.best_value

[32m[I 2022-10-22 13:13:43,685][0m A new study created in memory with name: no-name-c33d0a3b-fec4-4ce6-981c-81afe2e2502b[0m
[32m[I 2022-10-22 13:13:43,824][0m Trial 0 finished with value: 0.06818181818181818 and parameters: {'alpha': 0.9251223631803561, 'solver': 'saga'}. Best is trial 0 with value: 0.06818181818181818.[0m
[32m[I 2022-10-22 13:13:43,885][0m Trial 1 finished with value: 0.0524975024975025 and parameters: {'alpha': 0.44935082963315076, 'solver': 'cholesky'}. Best is trial 0 with value: 0.06818181818181818.[0m
[32m[I 2022-10-22 13:13:44,269][0m Trial 2 finished with value: 0.0524975024975025 and parameters: {'alpha': 0.3618469518786055, 'solver': 'sag'}. Best is trial 0 with value: 0.06818181818181818.[0m
[32m[I 2022-10-22 13:13:44,349][0m Trial 3 finished with value: 0.0524975024975025 and parameters: {'alpha': 0.45644093360862936, 'solver': 'auto'}. Best is trial 0 with value: 0.06818181818181818.[0m
[32m[I 2022-10-22 13:13:44,432][0m Trial 4 finished w

In [34]:
#Optimisation function for lasso regression with complementary guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_lasso,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lasso_complementary_gRNA = study.best_value

[32m[I 2022-10-22 13:14:56,502][0m A new study created in memory with name: no-name-a54367b4-eb1f-4986-8a51-6fcf3d17eb8f[0m
[32m[I 2022-10-22 13:14:56,560][0m Trial 0 finished with value: 0.0 and parameters: {'alpha': 0.12606525983557282}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:14:56,613][0m Trial 1 finished with value: 0.0 and parameters: {'alpha': 0.5299438261838598}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:14:56,668][0m Trial 2 finished with value: 0.0 and parameters: {'alpha': 0.934112097790581}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:14:56,724][0m Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.13123920287905166}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:14:56,793][0m Trial 4 finished with value: 0.0 and parameters: {'alpha': 0.7489564885393902}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:14:56,860][0m Trial 5 finished with value: 0.0 and parameters: {'alpha': 0.0520

In [36]:
#Optimisation function for Elastic Net regression with complementary guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_ElasticNet,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ElasticNet_complementary_gRNA = study.best_value

[32m[I 2022-10-22 13:15:41,074][0m A new study created in memory with name: no-name-5acf1a21-f44f-42c4-ba8f-c5d6c24d4ef2[0m
[32m[I 2022-10-22 13:15:41,127][0m Trial 0 finished with value: 0.11813186813186816 and parameters: {'alpha': 0.07992588471008821, 'l1_ratio': 0.005894907749072087}. Best is trial 0 with value: 0.11813186813186816.[0m
[32m[I 2022-10-22 13:15:41,181][0m Trial 1 finished with value: 0.0 and parameters: {'alpha': 0.11158227568537205, 'l1_ratio': 0.573509844048723}. Best is trial 0 with value: 0.11813186813186816.[0m
[32m[I 2022-10-22 13:15:41,235][0m Trial 2 finished with value: 0.0 and parameters: {'alpha': 0.05231053828092003, 'l1_ratio': 0.7633034413347135}. Best is trial 0 with value: 0.11813186813186816.[0m
[32m[I 2022-10-22 13:15:41,302][0m Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.7173008153294812, 'l1_ratio': 0.9526315536500721}. Best is trial 0 with value: 0.11813186813186816.[0m
[32m[I 2022-10-22 13:15:41,355][0m Trial 4 

In [38]:
#Optimisation function for decision tree regression with complementary guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_dt,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
dt_complementary_gRNA = study.best_value

[32m[I 2022-10-22 13:17:24,075][0m A new study created in memory with name: no-name-1a032a36-45f1-478f-8ed2-181708613565[0m
[32m[I 2022-10-22 13:17:24,168][0m Trial 0 finished with value: 0.0 and parameters: {'ccp_alpha': 0.915278583297645, 'criterion': 'friedman_mse'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:17:24,510][0m Trial 1 finished with value: 0.0 and parameters: {'ccp_alpha': 0.8104708359940822, 'criterion': 'absolute_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:17:24,595][0m Trial 2 finished with value: 0.0 and parameters: {'ccp_alpha': 0.06890618122796166, 'criterion': 'friedman_mse'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:17:24,679][0m Trial 3 finished with value: 0.0 and parameters: {'ccp_alpha': 0.3103190420580785, 'criterion': 'friedman_mse'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:17:25,023][0m Trial 4 finished with value: 0.0 and parameters: {'ccp_alpha': 0.13232251014601104, 'c

In [None]:
#Optimisation function for random forest regression with complementary guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_rf,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
rf_complementary_gRNA = study.best_value

In [39]:
#Optimisation function for xgboost regression with complementary guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_xgb,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
xgb_complementary_gRNA = study.best_value

[32m[I 2022-10-22 13:18:11,700][0m A new study created in memory with name: no-name-7a3dda77-9da1-4e84-9145-b050d5311eb5[0m
[32m[I 2022-10-22 13:18:14,700][0m Trial 0 finished with value: 0.08111888111888113 and parameters: {'eta': 0.1357572454476858, 'max_depth': 7, 'min_child_weight': 2, 'subsample': 0.6576085761824267, 'lambda': 0.9543973727586079}. Best is trial 0 with value: 0.08111888111888113.[0m
[32m[I 2022-10-22 13:18:17,440][0m Trial 1 finished with value: 0.23061938061938064 and parameters: {'eta': 0.08034680199101658, 'max_depth': 11, 'min_child_weight': 5, 'subsample': 0.7860824646927512, 'lambda': 0.8955296298201152}. Best is trial 1 with value: 0.23061938061938064.[0m
[32m[I 2022-10-22 13:18:20,155][0m Trial 2 finished with value: 0.12952047952047951 and parameters: {'eta': 0.11015426461493115, 'max_depth': 7, 'min_child_weight': 5, 'subsample': 0.960918144810615, 'lambda': 0.8636252438549135}. Best is trial 1 with value: 0.23061938061938064.[0m
[32m[I 2022-

In [40]:
#Optimisation function for lightgbm regression with complementary guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_lgbm,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lgbm_complementary_gRNA = study.best_value

[32m[I 2022-10-22 13:22:56,499][0m A new study created in memory with name: no-name-56a64985-e19f-4ec8-bdd2-9bd38d3e2cf5[0m
[32m[I 2022-10-22 13:22:57,190][0m Trial 0 finished with value: 0.12517482517482517 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 47, 'max_depth': 41, 'learning_rate': 0.09112886161245522, 'n_estimators': 153, 'min_child_weight': 0.0016536431513729938}. Best is trial 0 with value: 0.12517482517482517.[0m
[32m[I 2022-10-22 13:22:57,745][0m Trial 1 finished with value: 0.13221778221778221 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 23, 'max_depth': -1, 'learning_rate': 0.0592107767544082, 'n_estimators': 117, 'min_child_weight': 0.0038527124660021395}. Best is trial 1 with value: 0.13221778221778221.[0m
[32m[I 2022-10-22 13:22:58,094][0m Trial 2 finished with value: 0.1919080919080919 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 14, 'max_depth': 44, 'learning_rate': 0.022003565255321683, 'n_estimators': 77, 'min_child_weig

In [41]:
#Optimisation function for support vector machine regression with complementary guide RNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_svr,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
svr_complementary_gRNA = study.best_value

[32m[I 2022-10-22 13:23:48,798][0m A new study created in memory with name: no-name-f450241b-4ce7-4529-8c7e-0d24a5fa87e2[0m
[32m[I 2022-10-22 13:23:48,830][0m Trial 0 finished with value: 0.0 and parameters: {'kernel': 'rbf', 'degree': 7, 'C': 1.704893626314294, 'epsilon': 1.9256889071154426}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:23:48,847][0m Trial 1 finished with value: 0.0 and parameters: {'kernel': 'rbf', 'degree': 2, 'C': 0.49758223966434034, 'epsilon': 3.796533035458021}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:23:48,873][0m Trial 2 finished with value: 0.05317948717948717 and parameters: {'kernel': 'poly', 'degree': 2, 'C': 0.1642919708772983, 'epsilon': 0.6496020744541758}. Best is trial 2 with value: 0.05317948717948717.[0m
[32m[I 2022-10-22 13:23:48,898][0m Trial 3 finished with value: -0.030598290598290594 and parameters: {'kernel': 'linear', 'degree': 5, 'C': 4.697838158436915, 'epsilon': 0.8376985683954996}. Best is trial 

In [42]:
#Try 8 bit vectors where the first 4 bits represent the guide RNA and the second 4 bits represent the target DNA. Use only the 20bp matching region to begin with.
#Take the last twenty bases of the guide RNA sequences
complementary_guide_RNA_sequences = []
for i in guide_RNA_sequences:
    complementary_guide_RNA_sequence = i[21:41]
    complementary_guide_RNA_sequences.append(complementary_guide_RNA_sequence)

#One hot encode them
complementary_guide_RNA_one_hot_encoded_sequences = []
for i in complementary_guide_RNA_sequences:
    one_hot_encoded_sequence = one_hot_encode_RNA(i)
    complementary_guide_RNA_one_hot_encoded_sequences.append(one_hot_encoded_sequence)


#Take the middle 20 of the target complements and reorder back to fron so they become complementary to guide RNA sequence
target_DNA_sequences = df_target_DNA['Sequence']
target_DNA_complements = []
complementary_target_DNA_sequences = []
for i in target_DNA_sequences:
    complementary_target_DNA_sequence = i[39:19:-1] #This generates the complement to the guide
    complementary_target_DNA_sequences.append(complementary_target_DNA_sequence)   

complementary_target_DNA_one_hot_encoded_sequences = []
for i in complementary_target_DNA_sequences:
    one_hot_encoded_sequence = one_hot_encode_DNA(i)
    complementary_target_DNA_one_hot_encoded_sequences.append(one_hot_encoded_sequence)  

#Concatenate the one hot encodes sequences
concat_complementary_guide_RNAs_target_DNAs = []
# test with 1st element 
for i in range(len(complementary_guide_RNA_one_hot_encoded_sequences)):
    # print(i)
    concat_complementary_guide_RNA_target_DNA = np.hstack((complementary_guide_RNA_one_hot_encoded_sequences[i], complementary_target_DNA_one_hot_encoded_sequences[i])).ravel()
    concat_complementary_guide_RNAs_target_DNAs.append(concat_complementary_guide_RNA_target_DNA)

#Set up feature names 
feature_names_concat_guide_RNA_target_DNA = []
for i in range(20):
    feature_names_concat_guide_RNA_target_DNA.append('guide RNA A' + str(i+1))
    feature_names_concat_guide_RNA_target_DNA.append('guide RNA U' + str(i+1))
    feature_names_concat_guide_RNA_target_DNA.append('guide RNA C' + str(i+1))
    feature_names_concat_guide_RNA_target_DNA.append('guide RNA G' + str(i+1))
    feature_names_concat_guide_RNA_target_DNA.append('target DNA A' + str(i+1))
    feature_names_concat_guide_RNA_target_DNA.append('target DNA T' + str(i+1))
    feature_names_concat_guide_RNA_target_DNA.append('target DNA C' + str(i+1))
    feature_names_concat_guide_RNA_target_DNA.append('target DNA G' + str(i+1))

In [44]:
#Optimisation function for linear regression with concat guide RNA target DNA one hot encoding
model = LinearRegression()
score = cross_validate(model, concat_complementary_guide_RNAs_target_DNAs, k_values, cv = 10, scoring = spearman_rank_scorer)
score_mean = np.mean(score['test_score'])
linear_regression_concat_gRNA_target_DNA = score_mean
linear_regression_concat_gRNA_target_DNA

0.030133443845440998

In [45]:
#Optimisation function for ridge regression with concat guide RNA target DNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_ridge,
                x = concat_complementary_guide_RNAs_target_DNAs,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ridge_concat_gRNA_target_DNA = study.best_value

[32m[I 2022-10-22 13:28:15,603][0m A new study created in memory with name: no-name-f688c8b3-0eb1-4fd8-a9b5-a86896afcd89[0m
[32m[I 2022-10-22 13:28:15,681][0m Trial 0 finished with value: 0.10479520479520481 and parameters: {'alpha': 0.6946887992601674, 'solver': 'sparse_cg'}. Best is trial 0 with value: 0.10479520479520481.[0m
[32m[I 2022-10-22 13:28:15,767][0m Trial 1 finished with value: 0.10424575424575426 and parameters: {'alpha': 0.3944826228776924, 'solver': 'svd'}. Best is trial 0 with value: 0.10479520479520481.[0m
[32m[I 2022-10-22 13:28:15,822][0m Trial 2 finished with value: 0.10479520479520481 and parameters: {'alpha': 0.5916145830126709, 'solver': 'auto'}. Best is trial 0 with value: 0.10479520479520481.[0m
[32m[I 2022-10-22 13:28:15,877][0m Trial 3 finished with value: 0.106993006993007 and parameters: {'alpha': 0.31037254606456566, 'solver': 'cholesky'}. Best is trial 3 with value: 0.106993006993007.[0m
[32m[I 2022-10-22 13:28:16,570][0m Trial 4 finishe

In [47]:
#Optimisation function for lasso regression with concat guide RNA target DNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_lasso,
                x = concat_complementary_guide_RNAs_target_DNAs,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lasso_complementary_gRNA = study.best_value

[32m[I 2022-10-22 13:29:56,902][0m A new study created in memory with name: no-name-00e2aa33-a4dd-4c66-97f6-4e5f6a3fe0af[0m
[32m[I 2022-10-22 13:29:56,957][0m Trial 0 finished with value: 0.0 and parameters: {'alpha': 0.2571409562872282}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:29:57,004][0m Trial 1 finished with value: 0.0 and parameters: {'alpha': 0.43849239447032395}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:29:57,087][0m Trial 2 finished with value: 0.0 and parameters: {'alpha': 0.8462490436062229}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:29:57,176][0m Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.4635867648640971}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:29:57,247][0m Trial 4 finished with value: 0.0 and parameters: {'alpha': 0.862083163802678}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:29:57,317][0m Trial 5 finished with value: 0.0 and parameters: {'alpha': 0.47036

In [48]:
#Optimisation function for Elastic Net regression with concat guide RNA target DNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_ElasticNet,
                x = concat_complementary_guide_RNAs_target_DNAs,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ElasticNet_concat_gRNA_target_DNA = study.best_value

[32m[I 2022-10-22 13:30:11,932][0m A new study created in memory with name: no-name-24e8a9c7-a459-4a0f-8dc6-effb77f27867[0m
[32m[I 2022-10-22 13:30:11,983][0m Trial 0 finished with value: 0.0 and parameters: {'alpha': 0.23308211100185072, 'l1_ratio': 0.875898167497766}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:30:12,035][0m Trial 1 finished with value: 0.0 and parameters: {'alpha': 0.31385572542978324, 'l1_ratio': 0.3904088759580232}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:30:12,092][0m Trial 2 finished with value: 0.0 and parameters: {'alpha': 0.8381973313204025, 'l1_ratio': 0.15513206029444937}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:30:12,147][0m Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.6320331994399128, 'l1_ratio': 0.885552212362645}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:30:12,205][0m Trial 4 finished with value: 0.0 and parameters: {'alpha': 0.10312477949766663, 'l1_ratio':

In [49]:
#Optimisation function for decision tree regression with concat guide RNA target DNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_dt,
                x = concat_complementary_guide_RNAs_target_DNAs,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
dt_concat_gRNA_target_DNA = study.best_value

[32m[I 2022-10-22 13:31:34,187][0m A new study created in memory with name: no-name-42f64680-78e4-4d13-abbb-d24bbd73c346[0m
[32m[I 2022-10-22 13:31:34,298][0m Trial 0 finished with value: 0.0 and parameters: {'ccp_alpha': 0.4881479699785317, 'criterion': 'friedman_mse'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:31:34,887][0m Trial 1 finished with value: 0.0 and parameters: {'ccp_alpha': 0.08681413951233141, 'criterion': 'absolute_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:31:35,509][0m Trial 2 finished with value: 0.0 and parameters: {'ccp_alpha': 0.2574878768912062, 'criterion': 'absolute_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:31:35,621][0m Trial 3 finished with value: 0.0 and parameters: {'ccp_alpha': 0.13757254083673154, 'criterion': 'friedman_mse'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:31:35,730][0m Trial 4 finished with value: 0.0 and parameters: {'ccp_alpha': 0.4615996255853966,

In [None]:
#Optimisation function for Randome Forest regression with concat guide RNA target DNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_rf,
                x = concat_complementary_guide_RNAs_target_DNAs,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
rf_concat_gRNA_target_DNA = study.best_value

In [50]:
#Optimisation function for xgboost regression with concat guide RNA target DNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_xgb,
                x = concat_complementary_guide_RNAs_target_DNAs,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
xgb_concat_gRNA_target_DNA = study.best_value

[32m[I 2022-10-22 13:32:40,889][0m A new study created in memory with name: no-name-84971ab9-5fb7-4ad0-8e18-59ad509a0169[0m
[32m[I 2022-10-22 13:32:44,774][0m Trial 0 finished with value: 0.21123876123876123 and parameters: {'eta': 0.027726588485435945, 'max_depth': 7, 'min_child_weight': 4, 'subsample': 0.8603668707148974, 'lambda': 0.8987907218740773}. Best is trial 0 with value: 0.21123876123876123.[0m
[32m[I 2022-10-22 13:32:48,554][0m Trial 1 finished with value: 0.213036963036963 and parameters: {'eta': 0.11078836923869217, 'max_depth': 8, 'min_child_weight': 6, 'subsample': 0.8346537024501571, 'lambda': 0.8750573679164355}. Best is trial 1 with value: 0.213036963036963.[0m
[32m[I 2022-10-22 13:32:52,714][0m Trial 2 finished with value: 0.15014985014985016 and parameters: {'eta': 0.09008890514509538, 'max_depth': 9, 'min_child_weight': 4, 'subsample': 0.9869549886855347, 'lambda': 0.9495806979993842}. Best is trial 1 with value: 0.213036963036963.[0m
[32m[I 2022-10-2

In [54]:
#Optimisation function for lightgbm regression with concat guide RNA target DNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_lgbm,
                x = concat_complementary_guide_RNAs_target_DNAs,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lgbm_concat_gRNA_target_DNA = study.best_value

[32m[I 2022-10-22 13:39:10,660][0m A new study created in memory with name: no-name-41af0cc3-1ccb-4f0e-a27b-a1cf267cc721[0m
[32m[I 2022-10-22 13:39:11,216][0m Trial 0 finished with value: 0.15794205794205796 and parameters: {'boosting_type': 'gbdt', 'num_leaves': 18, 'max_depth': 33, 'learning_rate': 0.06546041068296811, 'n_estimators': 116, 'min_child_weight': 0.00423433444415523}. Best is trial 0 with value: 0.15794205794205796.[0m
[32m[I 2022-10-22 13:39:11,606][0m Trial 1 finished with value: 0.15604395604395602 and parameters: {'boosting_type': 'goss', 'num_leaves': 12, 'max_depth': 45, 'learning_rate': 0.037035997004261564, 'n_estimators': 125, 'min_child_weight': 0.004272133260975098}. Best is trial 0 with value: 0.15794205794205796.[0m
[32m[I 2022-10-22 13:39:12,516][0m Trial 2 finished with value: 0.17647352647352646 and parameters: {'boosting_type': 'dart', 'num_leaves': 20, 'max_depth': 13, 'learning_rate': 0.01086650841497578, 'n_estimators': 179, 'min_child_weig

In [55]:
#Optimisation function for support vector machine  regression with concat guide RNA target DNA one hot encoding
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_svr,
                x = concat_complementary_guide_RNAs_target_DNAs,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
svr_concat_gRNA_target_DNA = study.best_value

[32m[I 2022-10-22 13:40:01,498][0m A new study created in memory with name: no-name-df481f19-0ddc-44e2-bcc3-549ee87f112c[0m
[32m[I 2022-10-22 13:40:01,533][0m Trial 0 finished with value: 0.0 and parameters: {'kernel': 'sigmoid', 'degree': 5, 'C': 1.2251746388028235, 'epsilon': 4.1897593733224445}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:40:01,560][0m Trial 1 finished with value: 0.0 and parameters: {'kernel': 'rbf', 'degree': 9, 'C': 3.7241920515406535, 'epsilon': 4.835332009918722}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-22 13:40:01,590][0m Trial 2 finished with value: 0.04832478632478633 and parameters: {'kernel': 'rbf', 'degree': 3, 'C': 1.4822244402116063, 'epsilon': 0.9141069230783971}. Best is trial 2 with value: 0.04832478632478633.[0m
[32m[I 2022-10-22 13:40:01,618][0m Trial 3 finished with value: 0.0 and parameters: {'kernel': 'rbf', 'degree': 3, 'C': 4.3471562363567715, 'epsilon': 1.5638669071315432}. Best is trial 2 with value: 0.0