# Cargar datos
<hr>

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("Datos_con_ECFP.csv")

In [3]:
df.head()

Unnamed: 0,SMILES,BBB+/BBB-,ECFP,0,1,2,3,4,5,6,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,O=C(O)c1cc(N=Nc2ccc(S(=O)(=O)Nc3ccccn3)cc2)ccc1O,BBB-,[0 0 0 ... 0 0 0],0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,COC1(NC(=O)C(C(=O)O)c2ccc(O)cc2)C(=O)N2C(C(=O)...,BBB-,[0 1 0 ... 0 0 0],0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Oc1c(I)cc(Cl)c2cccnc12,BBB-,[0 0 0 ... 0 0 0],0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,CCNC(=NCCSCc1ncccc1Br)NC#N,BBB-,[0 0 0 ... 0 0 0],0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,CN1CC[C@]23c4c5ccc(OC6O[C@H](C(=O)O)[C@@H](O)[...,BBB-,[0 0 0 ... 0 0 0],0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
X = df.drop(columns=["SMILES", "BBB+/BBB-", "ECFP"])
y = df["BBB+/BBB-"]

# Modelo Gradient Boosting con PCA
<hr>

In [6]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
pipe = Pipeline([
    ('pca', PCA()),
    ('GB', GradientBoostingClassifier())
])

In [10]:
param_grid = {
    'pca__n_components': [150],  # Ejemplo de opciones para el número de componentes en PCA
    'GB__learning_rate': [0.05, 0.1, 0.2],
    'GB__n_estimators': [50, 100, 200],
    'GB__max_depth': [10, 20, 30],
    'GB__max_features': ['sqrt', 'log2'],
}


In [11]:
grid_search = GridSearchCV(estimator=pipe, param_grid=param_grid, cv=5, scoring='accuracy', verbose=2)

In [12]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 54 candidates, totalling 270 fits
[CV] END GB__learning_rate=0.05, GB__max_depth=10, GB__max_features=sqrt, GB__n_estimators=50, pca__n_components=150; total time=   7.3s
[CV] END GB__learning_rate=0.05, GB__max_depth=10, GB__max_features=sqrt, GB__n_estimators=50, pca__n_components=150; total time=   6.5s
[CV] END GB__learning_rate=0.05, GB__max_depth=10, GB__max_features=sqrt, GB__n_estimators=50, pca__n_components=150; total time=   6.3s
[CV] END GB__learning_rate=0.05, GB__max_depth=10, GB__max_features=sqrt, GB__n_estimators=50, pca__n_components=150; total time=   6.7s
[CV] END GB__learning_rate=0.05, GB__max_depth=10, GB__max_features=sqrt, GB__n_estimators=50, pca__n_components=150; total time=   6.0s
[CV] END GB__learning_rate=0.05, GB__max_depth=10, GB__max_features=sqrt, GB__n_estimators=100, pca__n_components=150; total time=  11.1s
[CV] END GB__learning_rate=0.05, GB__max_depth=10, GB__max_features=sqrt, GB__n_estimators=100, pca__n_components=1

KeyboardInterrupt: 