In [None]:
% pip install optuna

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
import optuna
import pandas as pd

# Importing excel sheets as dfs, should both have samples in the same order
predictors_df = pd.read_excel('Predictors_Cleaned.xlsx', 0)
    # Rows are samples, columns are predictors
outcomes_df = pd.read_excel('Outcomes_Cleaned.xlsx', 0)
    # Rows are samples, columns are mechanisms

# Dropping unnecessary metadata
X = predictors_df.drop('SAMPLE NAME', axis=1)
Y = outcomes_df.drop('MECHANISM', axis=1) # Full DF must iterate through

x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state=42, shuffle=True, test_size=0.3)

x_trainval, x_valid, y_trainval, y_valid = train_test_split(x_train, y_train, shuffle=True,  random_state=42)



In [None]:
# XGBoost

def xgb_objective(trial):
    param = {
        'tree_method':'gpu_hist',  # uses GPU for faster training
        'sampling_method': 'gradient_based',
        'lambda': trial.suggest_loguniform('lambda', 7.0, 17.0),
        'alpha': trial.suggest_loguniform('alpha', 7.0, 17.0),
        'eta': trial.suggest_categorical('eta', [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]),
        'gamma': trial.suggest_categorical('gamma', [18, 19, 20, 21, 22, 23, 24, 25]),
        'learning_rate': trial.suggest_categorical('learning_rate', [0.008,0.01,0.012,0.014,0.016,0.018, 0.02]),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'colsample_bynode': trial.suggest_categorical('colsample_bynode', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'n_estimators': trial.suggest_int('n_estimators', 400, 1000),
        'min_child_weight': trial.suggest_int('min_child_weight', 8, 600),  
        'max_depth': trial.suggest_categorical('max_depth', [3, 4, 5, 6, 7]),  
        'subsample': trial.suggest_categorical('subsample', [0.5,0.6,0.7,0.8,1.0]),
        'random_state': 42
    }

    model = XGBRegressor(**param)  
    
    model.fit(x_trainval, y_trainval, eval_set=[(x_valid, y_valid)], early_stopping_rounds=10, verbose=False)
    
    predict = model.predict(x_valid)
    
    r_2 = abs(r2_score(predict, y_valid))
    
    return r_2


study = optuna.create_study(direction="maximize")
study.optimize(xgb_objective, n_trials=50, show_progress_bar=True)
    
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial

print("  Value: {}".format(trial.value))

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

    # https://www.kaggle.com/code/alisultanov/regression-xgboost-optuna

In [None]:
from sklearn.ensemble import RandomForestRegressor
import numpy as np

def rf_objective(trial):
    # Hyperparameters
    n_estimators = trial.suggest_int("n_estimators", 10, 200, log=True)
    max_depth = trial.suggest_int("max_depth", 2, 32)
    min_samples_split = trial.suggest_int("min_samples_split", 2, 10)
    min_samples_leaf = trial.suggest_int("min_samples_leaf", 1, 10)

    # Model random forest
    model = RandomForestRegressor(
    n_estimators=n_estimators,
    max_depth=max_depth,
    min_samples_split=min_samples_split,
    min_samples_leaf=min_samples_leaf,
    random_state=42,
    )
    model.fit(x_train, y_train)

    # Calculate r2
    y_pred = model.predict(x_test)
    r2 = abs(r2_score(y_test, y_pred))

    return r2

# Create study object
study = optuna.create_study(direction="maximize")
study.optimize(rf_objective, n_trials=50, show_progress_bar=True)

print("Best trial:", study.best_trial)
print("Best hyperparameters:", study.best_params)

# https://www.kaggle.com/code/mustafagerme/optimization-of-random-forest-model-using-optuna

In [None]:
from sklearn import linear_model

def lasso_objective(trial):

    # Hyperparams
    _alpha = trial.suggest_float("alpha", 0.0001, 0.01)

    # Model Lasso
    lasso = linear_model.Lasso(alpha=_alpha, random_state=0)
    lasso = lasso.fit(x_train, y_train)

    # Calculating r2
    pred = lasso.predict(x_test)
    r2 = abs(r2_score(y_test, pred))

    return r2

study = optuna.create_study(direction="maximize")
study.optimize(lasso_objective, n_trials=50, show_progress_bar=True)

print("Best trial:", study.best_trial)
print("Best hyperparameters:", study.best_params)