In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn import linear_model
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import roc_auc_score
from sklearn.neighbors import KNeighborsClassifier

## Datos

In [4]:
df=pd.read_csv('diabetes.csv')
df= pd.get_dummies(df, columns=['smoking_history', 'gender'])
target = 'diabetes'
y = df[target]
X=df.copy()
X=X.drop(target, axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Modelos todas las variables

### Knn clasificador

In [None]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Normalización de datos
    ('knn', KNeighborsClassifier())  
])

params_grid = {
    'knn__n_neighbors': np.arange(1,30), 
    'knn__weights': ['uniform', 'distance'],
    'knn__metric': ['euclidean', 'manhattan'] 
}
random_search_knn = RandomizedSearchCV(estimator=pipeline, param_distributions=params_grid, n_iter=15, cv=5, n_jobs =-1, scoring='roc_auc')
random_search_knn.fit(X_train, y_train)
random_knn = random_search_knn.best_estimator_.predict_proba(X_test)[:, 1]
roc_auc_score(y_score=random_knn, y_true= y_test)

0.8127733435381588

### Regresion Lógistica

In [5]:
pipeline = Pipeline([
    ('poly', PolynomialFeatures(include_bias=False)),  
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(max_iter=10000, random_state=42))
])

params_grid= {
    'poly__degree':[1,2,3],
    'logreg__penalty':[ 'l2', None],
    'logreg__C':[0.01, 0.1, 1]
}
random_search_log_reg = RandomizedSearchCV(estimator=pipeline, param_distributions=params_grid, n_iter=15, cv=5, n_jobs =-1, scoring='roc_auc')
random_search_log_reg.fit(X_train, y_train)
random_log_reg = random_search_log_reg.best_estimator_.predict_proba(X_test)[:, 1]
roc_auc_score(y_score=random_log_reg, y_true= y_test)





0.8385106381208098