This notebook takes in sequence features and runs them through a different models. All model hyperparameters are 
selected using optuna. Models are run across 10 folds and benchmarking results are plotted
for mean squared error and pearson's correlation coefficient



In [1]:
#import libraries
import pandas as pd
import numpy as np 
import sklearn
import os
ROOT_PATH = os.path.abspath('')
ROOT_PATH

'C:\\Users\\ad3913\\Documents\\Github\\cas12-collateral-cleavage-prediction'

In [2]:
#Load up guide RNA And target DNA as dataframes
df_guide_RNA = pd.read_csv(ROOT_PATH + "/full_guide_RNA_sequences.csv")
df_target_DNA = pd.read_csv(ROOT_PATH + "/target_DNA_sequences.csv")

# # Load k_value_sigmoid from experiment on 24_05_2022 
from numpy import genfromtxt
k_values = genfromtxt(ROOT_PATH + '/k_values_sigmoid.csv', delimiter=',')
k_values

array([ 0.11984317,  0.13894219,  0.15437609,  0.06970868,  0.13297348,
        0.12425794,  0.18793106,  0.27095051,  0.16818124,  0.1347531 ,
        0.29226458,  0.12293203,  0.38708549,  0.19778096,  0.3622934 ,
        0.34890034,  0.20684573,  0.11417769, -0.07936754,  0.31408586,
        0.18672599,  0.13433442,  0.1847865 ,  0.24778192,  0.42079963,
        0.35078867,  0.09680571,  0.5202575 ,  0.22697745,  0.3248318 ,
        0.1702902 ,  0.24634137,  0.14220321, -2.51561011,  0.17162254,
        0.04222993,  0.24852536,  0.296885  ,  0.27256568,  0.32140478,
        0.21027936,  0.1127019 ,  0.20870236,  0.28398043,  0.32744511,
        0.2282549 ,  0.14870497,  0.32030304,  0.2445419 ,  0.08811278,
        0.44583873,  0.15249895,  0.23265821,  0.22829356,  0.2489641 ,
        0.16632398,  0.27098826,  0.18329734,  0.11389634,  0.29132781,
        0.35664269, -0.2822598 ,  0.14580803,  0.06875822,  0.09644608,
        0.10429324,  0.16959348,  0.32049426,  0.3353713 ,  0.18

In [3]:
def one_hot_encode_DNA(DNA_sequence):
    mapping = dict(zip("atcg", range(4)))    
    one_hot_encoded_sequence = [mapping[i] for i in DNA_sequence]
    return np.eye(4)[one_hot_encoded_sequence]

In [4]:
def one_hot_encode_RNA(gRNA_sequence):
    mapping = dict(zip("aucg", range(4)))    
    one_hot_encoded_sequence = [mapping[i] for i in gRNA_sequence]
    return np.eye(4)[one_hot_encoded_sequence]

In [7]:
guide_RNA_sequences = df_guide_RNA['Sequence']
guide_RNA_one_hot_encoded_sequences = []
for i in guide_RNA_sequences:
    one_hot_encoded_sequence = one_hot_encode_RNA(i)
    guide_RNA_one_hot_encoded_sequences.append(one_hot_encoded_sequence.ravel())

#generate feature names
feature_names_gRNA = []
for i in range(41):
    feature_names_gRNA.append('guide RNA A' + str(i+1))
    feature_names_gRNA.append('guide RNA U' + str(i+1))
    feature_names_gRNA.append('guide RNA C' + str(i+1))
    feature_names_gRNA.append('guide RNA G' + str(i+1))

In [8]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats.mstats import spearmanr
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LinearRegression
from sklearn.metrics import make_scorer

def spearman_rank_scorer(model,X,Y):
    Y_predict = model.predict(X)
    spearman_rank = spearmanr(Y,Y_predict)
    rho = spearman_rank[0]
    return(rho)

model = LinearRegression()
score = cross_validate(model, guide_RNA_one_hot_encoded_sequences, k_values, cv = 10, scoring = spearman_rank_scorer)
score_mean = np.mean(score['test_score'])

In [9]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from sklearn.linear_model import Ridge
from functools import *

def objective_ridge(trial,x,y):
    
    alpha = trial.suggest_float('alpha', 0, 1)
    solver = trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'])
    
    
    model = Ridge(
        alpha = alpha,
        solver = solver,
        )
    
    #Use 5 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)
    
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_ridge,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ridge_gRNA = study.best_value



[32m[I 2022-10-21 17:50:11,375][0m A new study created in memory with name: no-name-3a276eab-6911-4e2e-8ceb-baeafd73be6b[0m
[32m[I 2022-10-21 17:50:11,429][0m Trial 0 finished with value: 0.05924075924075924 and parameters: {'alpha': 0.7437351920730865, 'solver': 'sparse_cg'}. Best is trial 0 with value: 0.05924075924075924.[0m
[32m[I 2022-10-21 17:50:11,474][0m Trial 1 finished with value: 0.06818181818181818 and parameters: {'alpha': 0.9368039554853894, 'solver': 'sparse_cg'}. Best is trial 1 with value: 0.06818181818181818.[0m
[32m[I 2022-10-21 17:50:11,510][0m Trial 2 finished with value: 0.05544455544455547 and parameters: {'alpha': 0.5240030272411542, 'solver': 'lsqr'}. Best is trial 1 with value: 0.06818181818181818.[0m
[32m[I 2022-10-21 17:50:11,812][0m Trial 3 finished with value: 0.05924075924075924 and parameters: {'alpha': 0.7407020257910297, 'solver': 'sag'}. Best is trial 1 with value: 0.06818181818181818.[0m
[32m[I 2022-10-21 17:50:11,865][0m Trial 4 fin

In [34]:
import optuna
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from sklearn.linear_model import Lasso
from functools import *

def objective_lasso(trial,x,y):
    alpha = trial.suggest_float('alpha', 0, 1)
    
    model = Lasso(
        alpha = alpha
        )
    
    #Use 5 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)
    
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_lasso,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lasso_gRNA = study.best_value


[32m[I 2022-10-21 17:41:16,268][0m A new study created in memory with name: no-name-34fa6dc9-7fcc-4971-bc9c-ed8af945c330[0m
[32m[I 2022-10-21 17:41:16,309][0m Trial 0 finished with value: 0.0 and parameters: {'alpha': 0.20075400307127644}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:16,340][0m Trial 1 finished with value: 0.0 and parameters: {'alpha': 0.09657195313414302}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:16,370][0m Trial 2 finished with value: 0.0 and parameters: {'alpha': 0.9688264698851147}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:16,396][0m Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.6994257066999374}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:16,436][0m Trial 4 finished with value: 0.0 and parameters: {'alpha': 0.28556410045426805}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:16,460][0m Trial 5 finished with value: 0.0 and parameters: {'alpha': 0.26

In [35]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from sklearn.linear_model import ElasticNet

def objective_ElasticNet(trial,x,y):
    alpha = trial.suggest_float('alpha', 0, 1)
    l1_ratio = trial.suggest_float('l1_ratio',0,1)
    
    model = ElasticNet(
        alpha = alpha,
        l1_ratio = l1_ratio
        )
    
    #Use 5 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)
    
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_ElasticNet,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ElastiNet_gRNA = study.best_value




[32m[I 2022-10-21 17:41:44,769][0m A new study created in memory with name: no-name-23efb0a2-c324-44f1-812a-ead9d9164c1d[0m
[32m[I 2022-10-21 17:41:44,809][0m Trial 0 finished with value: 0.0 and parameters: {'alpha': 0.4794608202864947, 'l1_ratio': 0.45431800220532237}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:44,847][0m Trial 1 finished with value: 0.0 and parameters: {'alpha': 0.7608482039900188, 'l1_ratio': 0.14652568340580507}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:44,877][0m Trial 2 finished with value: 0.0 and parameters: {'alpha': 0.30963725073070447, 'l1_ratio': 0.3501238510122474}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:44,904][0m Trial 3 finished with value: 0.0 and parameters: {'alpha': 0.3530221017304439, 'l1_ratio': 0.1178233956438739}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:41:44,946][0m Trial 4 finished with value: 0.15264735264735263 and parameters: {'alpha': 0.006504050258

In [37]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_validate
from functools import *

def objective_dt(trial,x,y):
    
    ccp_alpha = trial.suggest_float('ccp_alpha', 0, 1)
    criterion = trial.suggest_categorical('criterion', ['squared_error', 'friedman_mse', 'absolute_error'])
    
    model =  DecisionTreeRegressor(
        ccp_alpha = ccp_alpha,
        criterion = criterion,
        random_state = 42)
    
    #Use 5 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)
    
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_dt,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
dt_gRNA = study.best_value

[32m[I 2022-10-21 17:44:39,002][0m A new study created in memory with name: no-name-65f7b49d-0de7-477e-94a1-a962db6a5e2c[0m
[32m[I 2022-10-21 17:44:39,064][0m Trial 0 finished with value: 0.0 and parameters: {'ccp_alpha': 0.4403694069937888, 'criterion': 'friedman_mse'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:44:39,237][0m Trial 1 finished with value: 0.0 and parameters: {'ccp_alpha': 0.1512997270545553, 'criterion': 'absolute_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:44:39,438][0m Trial 2 finished with value: 0.0 and parameters: {'ccp_alpha': 0.6582767879310082, 'criterion': 'absolute_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:44:39,511][0m Trial 3 finished with value: 0.0 and parameters: {'ccp_alpha': 0.28411140577887906, 'criterion': 'squared_error'}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-10-21 17:44:39,566][0m Trial 4 finished with value: 0.0 and parameters: {'ccp_alpha': 0.8288276880535123,

In [39]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_validate

def objective_rf(trial,x,y):
    n_estimators = trial.suggest_int("n_estimators", 100, 1000)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 30)
    max_samples = trial.suggest_float("max_samples", 0.01, 1.0)
    max_features = trial.suggest_float("max_features", 0.01, 1.0)
    
    model = RandomForestRegressor(
        n_estimators = n_estimators,
        min_samples_leaf = min_samples_leaf,
        max_samples = max_samples,
        max_features = max_features, 
        random_state = 42)
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)
    
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_rf,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
rf_gRNA = study.best_value

[32m[I 2022-10-21 17:47:26,648][0m A new study created in memory with name: no-name-84dd8b55-2daa-49f4-9783-a1287f0e7338[0m
[33m[W 2022-10-21 17:47:31,107][0m Trial 0 failed because of the following error: KeyboardInterrupt()[0m
Traceback (most recent call last):
  File "C:\Users\ad3913\Anaconda3\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\ad3913\AppData\Local\Temp\ipykernel_14024\2979855102.py", line 23, in objective_rf
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
  File "C:\Users\ad3913\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 267, in cross_validate
    results = parallel(
  File "C:\Users\ad3913\Anaconda3\lib\site-packages\joblib\parallel.py", line 1046, in __call__
    while self.dispatch_one_batch(iterator):
  File "C:\Users\ad3913\Anaconda3\lib\site-packages\joblib\parallel.py", line 861, in dispatch_one_batch
    self._dispatch(t

KeyboardInterrupt: 

In [10]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from functools import *
from xgboost import XGBRegressor 

def objective_xgb(trial,x,y):
    eta = trial.suggest_float("eta", 0, 0.2)
    max_depth = trial.suggest_int("max_depth", 7, 11)
    min_child_weight = trial.suggest_int("min_child_weight", 2, 6)
    subsample = trial.suggest_float("subsample", 0.6, 1.0)
    reg_lambda = trial.suggest_float("lambda", 0.8, 1.0)
    
    model = XGBRegressor(
        eta = eta,
        max_depth = max_depth,
        min_child_weight = min_child_weight,
        subsample = subsample,
        reg_lambda = reg_lambda,
        nthread = 1)
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)
    
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_xgb,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
xgb_gRNA = study.best_value

[32m[I 2022-10-21 17:50:23,991][0m A new study created in memory with name: no-name-41272816-163d-44b1-9ed3-5d9ee1df6110[0m
[32m[I 2022-10-21 17:50:25,620][0m Trial 0 finished with value: 0.14600399600399602 and parameters: {'eta': 0.14891251309042328, 'max_depth': 10, 'min_child_weight': 6, 'subsample': 0.8540727311209435, 'lambda': 0.9706339287047402}. Best is trial 0 with value: 0.14600399600399602.[0m
[32m[I 2022-10-21 17:50:27,468][0m Trial 1 finished with value: 0.14460539460539462 and parameters: {'eta': 0.1040081362144557, 'max_depth': 7, 'min_child_weight': 2, 'subsample': 0.9556125549774399, 'lambda': 0.8113841627412532}. Best is trial 0 with value: 0.14600399600399602.[0m
[32m[I 2022-10-21 17:50:29,200][0m Trial 2 finished with value: 0.23791208791208787 and parameters: {'eta': 0.06961797347975562, 'max_depth': 10, 'min_child_weight': 3, 'subsample': 0.615622929179774, 'lambda': 0.8420123297401936}. Best is trial 2 with value: 0.23791208791208787.[0m
[32m[I 2022

In [11]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from functools import *
from lightgbm import LGBMRegressor

def objective_lgbm(trial,x,y):
    boosting_type = trial.suggest_categorical("boosting_type", ['gbdt', 'dart', 'goss'])
    num_leaves = trial.suggest_int("num_leaves", 2, 50)
    max_depth = trial.suggest_int("max_depth", -1, 50)
    learning_rate = trial.suggest_float("learning_rate", 0.001, 0.1)
    n_estimators = trial.suggest_int("n_estimators", 50, 200)
    min_child_weight = trial.suggest_float("min_child_weight", 0.001, 0.005)
        
    model = LGBMRegressor(
        boosting_type = boosting_type,
        num_leaves = num_leaves,
        max_depth = max_depth,
        learning_rate = learning_rate,
        n_estimators = n_estimators,
        min_child_weight = min_child_weight,
        )
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 10, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)
    
study = optuna.create_study(direction = 'maximize')
optimization_function = partial(
                objective_lgbm,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lgbm_gRNA = study.best_value

ModuleNotFoundError: No module named 'lightgbm'

In [13]:
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from scipy.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_validate
from functools import *
from sklearn.svm import SVR

def objective_svr(trial,x,y):
    kernel = trial.suggest_categorical("kernel", ["linear", "poly", "rbf", "sigmoid"])
    degree = trial.suggest_int("degree", 1, 10)
    C = trial.suggest_float("C", 0, 5)
    epsilon = trial.suggest_float("epsilon", 0, 5)
    
        
    model = SVR(
        kernel = kernel,
        degree = degree,
        C = C,
        epsilon = epsilon
        )
    
    #Use 10 way cross validation for training.
    score = cross_validate(model, x, y, cv = 5, scoring = spearman_rank_scorer)
    score_mean = np.mean(score['test_score'])
    return(score_mean)
    
study = optuna.create_study(direction = 'maximize)
optimization_function = partial(
                objective_svr,
                x = guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
svr_gRNA = study.best_value

[32m[I 2022-10-19 18:17:01,340][0m A new study created in memory with name: no-name-1dbf6793-ab47-43fc-af05-fc5f6e9f9e46[0m
[32m[I 2022-10-19 18:17:01,361][0m Trial 0 finished with value: 1.2047362142133398 and parameters: {'kernel': 'linear', 'degree': 5, 'C': 3.022632875724411, 'epsilon': 3.2048251889986643}. Best is trial 0 with value: 1.2047362142133398.[0m
[32m[I 2022-10-19 18:17:01,385][0m Trial 1 finished with value: 0.08019004365017482 and parameters: {'kernel': 'poly', 'degree': 9, 'C': 1.1475622362476896, 'epsilon': 0.055910266778556594}. Best is trial 1 with value: 0.08019004365017482.[0m
[32m[I 2022-10-19 18:17:01,402][0m Trial 2 finished with value: 1.2047362142133398 and parameters: {'kernel': 'linear', 'degree': 5, 'C': 0.6114053351710419, 'epsilon': 1.9243457775956319}. Best is trial 1 with value: 0.08019004365017482.[0m
[32m[I 2022-10-19 18:17:01,432][0m Trial 3 finished with value: 0.09194829482143656 and parameters: {'kernel': 'rbf', 'degree': 4, 'C': 3

In [18]:
#One hot encode only the 20 gRNA bases that interact with the target DNA
complementary_guide_RNA_sequences = []
for i in guide_RNA_sequences:
    complementary_guide_RNA_sequence = i[21:41]
    complementary_guide_RNA_sequences.append(complementary_guide_RNA_sequence)

#One hot encode them
complementary_guide_RNA_one_hot_encoded_sequences = []
for i in complementary_guide_RNA_sequences:
    one_hot_encoded_sequence = one_hot_encode_RNA(i).ravel()
    complementary_guide_RNA_one_hot_encoded_sequences.append(one_hot_encoded_sequence)

#generate feature names
feature_names_complementary_gRNA = []
for i in range(20):
    feature_names_complementary_gRNA.append('guide RNA A' + str(i+1))
    feature_names_complementary_gRNA.append('guide RNA U' + str(i+1))
    feature_names_complementary_gRNA.append('guide RNA C' + str(i+1))
    feature_names_complementary_gRNA.append('guide RNA G' + str(i+1))

In [40]:
model = LinearRegression()
score = cross_validate(model, complementary_guide_RNA_one_hot_encoded_sequences, k_values, cv = 5, scoring = 'neg_mean_squared_error')
score_mean = -np.mean(score['test_score'])
linear_regression_complementary_gRNA = score_mean
linear_regression_complementary_gRNA

0.23937805191439362

In [41]:
study = optuna.create_study(direction = 'minimize')
optimization_function = partial(
                objective_ridge,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ridge_complementary_gRNA = study.best_value

[32m[I 2022-10-21 10:44:32,756][0m A new study created in memory with name: no-name-5d24517b-0656-45eb-8088-8b3c34838de8[0m
[32m[I 2022-10-21 10:44:32,787][0m Trial 0 finished with value: 0.1788140460866425 and parameters: {'alpha': 0.7689143113528994, 'solver': 'saga'}. Best is trial 0 with value: 0.1788140460866425.[0m
[32m[I 2022-10-21 10:44:32,798][0m Trial 1 finished with value: 0.17952142127725873 and parameters: {'alpha': 0.7733514192347101, 'solver': 'auto'}. Best is trial 0 with value: 0.1788140460866425.[0m
[32m[I 2022-10-21 10:44:32,813][0m Trial 2 finished with value: 0.19770467955079038 and parameters: {'alpha': 0.31865918691020667, 'solver': 'lsqr'}. Best is trial 0 with value: 0.1788140460866425.[0m
[32m[I 2022-10-21 10:44:32,825][0m Trial 3 finished with value: 0.17345282400868942 and parameters: {'alpha': 0.9686490114380973, 'solver': 'cholesky'}. Best is trial 3 with value: 0.17345282400868942.[0m
[32m[I 2022-10-21 10:44:32,838][0m Trial 4 finished wi

In [42]:
study = optuna.create_study(direction = 'minimize')
optimization_function = partial(
                objective_lasso,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
lasso_complementary_gRNA = study.best_value

[32m[I 2022-10-21 10:44:39,364][0m A new study created in memory with name: no-name-47e75886-1c23-4d63-ae5c-c24aea8d135f[0m
[32m[I 2022-10-21 10:44:39,371][0m Trial 0 finished with value: 0.07605086973235141 and parameters: {'alpha': 0.13860243753468438}. Best is trial 0 with value: 0.07605086973235141.[0m
[32m[I 2022-10-21 10:44:39,379][0m Trial 1 finished with value: 0.07605086973235141 and parameters: {'alpha': 0.8133508828699847}. Best is trial 0 with value: 0.07605086973235141.[0m
[32m[I 2022-10-21 10:44:39,389][0m Trial 2 finished with value: 0.07605086973235141 and parameters: {'alpha': 0.6155536553921392}. Best is trial 0 with value: 0.07605086973235141.[0m
[32m[I 2022-10-21 10:44:39,397][0m Trial 3 finished with value: 0.07605086973235141 and parameters: {'alpha': 0.5769101483981451}. Best is trial 0 with value: 0.07605086973235141.[0m
[32m[I 2022-10-21 10:44:39,405][0m Trial 4 finished with value: 0.07605086973235141 and parameters: {'alpha': 0.51301278645520

In [43]:
study = optuna.create_study(direction = 'minimize')
optimization_function = partial(
                objective_ElasticNet,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ElasticNet_complementary_gRNA = study.best_value

[32m[I 2022-10-21 10:44:49,899][0m A new study created in memory with name: no-name-08b412b1-dbd0-4bbb-8096-fa907c32d82a[0m
[32m[I 2022-10-21 10:44:49,909][0m Trial 0 finished with value: 0.07605086973235141 and parameters: {'alpha': 0.9821956938875107, 'l1_ratio': 0.10772130833366445}. Best is trial 0 with value: 0.07605086973235141.[0m
[32m[I 2022-10-21 10:44:49,935][0m Trial 1 finished with value: 0.18302909765585584 and parameters: {'alpha': 0.0009105393164136055, 'l1_ratio': 0.6007350717355755}. Best is trial 0 with value: 0.07605086973235141.[0m
[32m[I 2022-10-21 10:44:49,941][0m Trial 2 finished with value: 0.07605086973235141 and parameters: {'alpha': 0.7535819421062441, 'l1_ratio': 0.6573705512435053}. Best is trial 0 with value: 0.07605086973235141.[0m
[32m[I 2022-10-21 10:44:49,949][0m Trial 3 finished with value: 0.07605086973235141 and parameters: {'alpha': 0.18370192568227517, 'l1_ratio': 0.8090638013050026}. Best is trial 0 with value: 0.07605086973235141.

In [44]:
study = optuna.create_study(direction = 'minimize')
optimization_function = partial(
                objective_dt,
                x = complementary_guide_RNA_one_hot_encoded_sequences,
                y = k_values
            )
study.optimize(optimization_function, n_trials=100) 
ridge_gRNA = study.best_value

[32m[I 2022-10-21 10:45:26,383][0m A new study created in memory with name: no-name-f6ac92c5-6da0-476b-9c8a-5c53a51ba03f[0m


NameError: name 'objective_dt' is not defined