In [1]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
import optuna
import pickle

In [3]:
# Load dataset
# Load data from CSV
df = pd.read_csv('Selected_fingerprints.csv')

### 5. Define and Optimize the Model with Optuna

X = df.drop(['IC50_label', 'smiles'], axis=1)
y = df['IC50_label']

In [5]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
from sklearn.metrics import accuracy_score

In [None]:
import optuna
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

def objective_ada(trial):
    # Hyperparameter suggestions
    n_estimators = trial.suggest_int('n_estimators', 50, 500)
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True)
    algorithm = trial.suggest_categorical('algorithm', ['SAMME', 'SAMME.R'])
    
    # Base estimator hyperparameters
    base_max_depth = trial.suggest_int('base_max_depth', 1, 10)
    base_min_samples_split = trial.suggest_int('base_min_samples_split', 2, 20)
    base_min_samples_leaf = trial.suggest_int('base_min_samples_leaf', 1, 20)
    
    # Define the base estimator with suggested hyperparameters
    base_estimator = DecisionTreeClassifier(
        max_depth=base_max_depth,
        min_samples_split=base_min_samples_split,
        min_samples_leaf=base_min_samples_leaf,
        random_state=42
    )
    
    # Initialize AdaBoostClassifier with the updated parameter
    model = AdaBoostClassifier(
        estimator=base_estimator,  # Updated parameter name
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        algorithm=algorithm,
        random_state=42
    )
    
    # Fit the model (Assuming X_train and y_train are predefined)
    model.fit(X_train, y_train)
    
    # Predict and evaluate accuracy (Assuming X_test and y_test are predefined)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    
    return accuracy

# Create and optimize the study
study_ada = optuna.create_study(direction='maximize')
study_ada.optimize(objective_ada, n_trials=100)

# Retrieve the best parameters
best_params_ada = study_ada.best_params
print("Best Parameters:", best_params_ada)