In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
diabetes = pd.read_csv('diabetes_dataset.csv')

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


X = diabetes.drop(columns=['Outcome','Pregnancies','SkinThickness' ])
y = diabetes['Outcome']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.7597402597402597
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        99
           1       0.65      0.71      0.68        55

    accuracy                           0.76       154
   macro avg       0.74      0.75      0.74       154
weighted avg       0.77      0.76      0.76       154

Confusion Matrix:
[[78 21]
 [16 39]]


In [3]:
X = diabetes.drop(columns=['Outcome','Pregnancies','SkinThickness' ])
y = diabetes['Outcome']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.7597402597402597
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        99
           1       0.65      0.71      0.68        55

    accuracy                           0.76       154
   macro avg       0.74      0.75      0.74       154
weighted avg       0.77      0.76      0.76       154

Confusion Matrix:
[[78 21]
 [16 39]]


In [7]:
def eval(model, X_train, X_test):
    y_pred = model.predict(X_test)
    y_pred_train = model.predict(X_train)
    print("Test_Set")
    print(classification_report(y_test,y_pred))
    print("Train_Set")
    print(classification_report(y_train,y_pred_train))   
    plt.figure(figsize=(12,8))


from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

#max_depth: Profundidad máxima del árbol.
#max_features: Número máximo de características a considerar en cada división de nodo.
#n_estimators: Número de estimadores en el Random Forest.
rf_params = {'n_estimators': [10, 15, 20],
    'max_depth': [None, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]}
rf = RandomForestClassifier()
rf_model_cv = GridSearchCV(rf,rf_params,cv=5,n_jobs = -1)
rf_model_cv.fit(X_train,y_train)

rf_model_cv.best_params_

rf_tuned = RandomForestClassifier(max_depth=10,
                                 max_features=6,
                                 min_samples_split=3,
                                 n_estimators=10)
rf_tuned.fit(X_train,y_train)

  

eval(rf_tuned,X_train,X_test)

Test_Set
              precision    recall  f1-score   support

           0       0.80      0.77      0.78        99
           1       0.61      0.65      0.63        55

    accuracy                           0.73       154
   macro avg       0.71      0.71      0.71       154
weighted avg       0.73      0.73      0.73       154

Train_Set
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       401
           1       0.97      0.95      0.96       213

    accuracy                           0.97       614
   macro avg       0.97      0.97      0.97       614
weighted avg       0.97      0.97      0.97       614



<Figure size 1200x800 with 0 Axes>

In [9]:
from joblib import dump, load
dump(rf_tuned,'diabetesCLasificacion.joblib')

['diabetesCLasificacion.joblib']

In [8]:
# Crear una nueva instancia de datos manualmente
##nueva_instancia = [[148, 72, 0, 33.6, 0.627, 50]]
nueva_instancia = pd.DataFrame([[148, 72, 0, 33.6, 0.627, 50]],
                               columns=['Glucose', 'BloodPressure', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'])
nueva_instancia1 = pd.DataFrame([[85, 66, 0, 26.6, 0.351, 31]],
                               columns=['Glucose', 'BloodPressure', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age'])

# Realizar la predicción utilizando el modelo entrenado
prediccion = rf_tuned.predict(nueva_instancia1)

# Imprimir la predicción
if prediccion[0] == 1:
    print("El modelo predice que tiene diabetes.")
else:
    print("El modelo predice que no tiene diabetes.")


El modelo predice que no tiene diabetes.
