# ü§ñ Modelo Predicci√≥n de Diabetes 
Pima Dataset de `statsmodels.api.datasets.get_rdataset("Pima.tr", "MASS").data` para aprender Streamlit

In [1]:
import statsmodels.api as sm
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import joblib

# Cargar dataset
df = sm.datasets.get_rdataset("Pima.tr", "MASS").data
df

Unnamed: 0,npreg,glu,bp,skin,bmi,ped,age,type
0,5,86,68,28,30.2,0.364,24,No
1,7,195,70,33,25.1,0.163,55,Yes
2,5,77,82,41,35.8,0.156,35,No
3,0,165,76,43,47.9,0.259,26,No
4,0,107,60,25,26.4,0.133,23,No
...,...,...,...,...,...,...,...,...
195,2,141,58,34,25.4,0.699,24,No
196,7,129,68,49,38.5,0.439,43,Yes
197,0,106,70,37,39.4,0.605,22,No
198,1,118,58,36,33.3,0.261,23,No


In [2]:
# Codificar variable objetivo
df['type'] = LabelEncoder().fit_transform(df['type'])  # Yes=1, No=0

# Separar X e y
X = df.drop('type', axis=1)
y = df['type']

# Entrenar modelo
modelo = LogisticRegression(max_iter=1000)
modelo.fit(X, y)

In [3]:
# --- Evaluaci√≥n del modelo ---
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
# Predicciones sobre el mismo conjunto (no recomendado)
y_pred = modelo.predict(X)

# Exactitud
acc = accuracy_score(y, y_pred)
print(f"‚úÖ Exactitud del modelo: {acc:.3f}")

# Matriz de confusi√≥n
cm = confusion_matrix(y, y_pred)
print("\nMatriz de confusi√≥n:")
print(cm)

# Reporte de clasificaci√≥n
print("\nReporte de clasificaci√≥n:")
print(classification_report(y, y_pred, target_names=["No diab√©tica", "Diab√©tica"]))



‚úÖ Exactitud del modelo: 0.765

Matriz de confusi√≥n:
[[115  17]
 [ 30  38]]

Reporte de clasificaci√≥n:
              precision    recall  f1-score   support

No diab√©tica       0.79      0.87      0.83       132
   Diab√©tica       0.69      0.56      0.62        68

    accuracy                           0.77       200
   macro avg       0.74      0.72      0.72       200
weighted avg       0.76      0.77      0.76       200



In [4]:
from pathlib import Path

# --- Guardar en carpeta artefacto/ ---
path_ar="artefactos"
ART_DIR = Path(path_ar)
ART_DIR.mkdir(parents=True, exist_ok=True)

MODEL_PATH = ART_DIR / "modelo_pima.pkl"
joblib.dump(modelo, MODEL_PATH)
print(f"‚úÖ Modelo guardado en {MODEL_PATH.resolve()}")

# (Opcional) Ver qu√© hay en artefactos/
print("Contenido de "+path_ar+"/:", list(ART_DIR.iterdir()))

‚úÖ Modelo guardado en /opt/notebooks/252ml/vide/artefactos/modelo_pima.pkl
Contenido de artefactos/: [PosixPath('artefactos/modelo_pima.pkl')]


In [5]:
# Revisar archivos para streamlit_app.py
from pathlib import Path

# Ruta al modelo dentro de artefactos/
MODEL_PATH = Path("artefactos") / "modelo_pima.pkl"
modelo = joblib.load(MODEL_PATH)
print(f"‚úÖ Modelo le√≠do de {MODEL_PATH.resolve()}")
modelo

‚úÖ Modelo le√≠do de /opt/notebooks/252ml/vide/artefactos/modelo_pima.pkl


# Deploy

In [6]:
# streamlit_app.py
import streamlit as st
import pandas as pd
import joblib
from pathlib import Path

# Ruta al modelo dentro de artefactos/
MODEL_PATH = Path("artefactos") / "modelo_pima.pkl"
modelo = joblib.load(MODEL_PATH)

st.title("ü§ñ Predicci√≥n de Diabetes (Pima Dataset)")
st.write("Ingrese los valores cl√≠nicos para predecir si la paciente probablemente tiene diabetes.")

data = {
    'npreg': st.slider("N√∫mero de embarazos", 0, 20, 2),
    'glu':   st.slider("Nivel de glucosa (mg/dl)", 50, 200, 100),
    'bp':    st.slider("Presi√≥n arterial (mmHg)", 40, 130, 70),
    'skin':  st.slider("Espesor del pliegue cut√°neo (mm)", 7, 100, 20),
    'bmi':   st.slider("IMC", 10.0, 50.0, 25.0),
    'ped':   st.slider("Pedigree de diabetes", 0.0, 2.5, 0.5),
    'age':   st.slider("Edad (a√±os)", 18, 90, 35)
}
data

2025-11-11 15:33:03.172 
  command:

    streamlit run /opt/conda/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]


{'npreg': 2,
 'glu': 100,
 'bp': 70,
 'skin': 20,
 'bmi': 25.0,
 'ped': 0.5,
 'age': 35}

In [7]:
#if st.button("Predecir"):

In [8]:
    entrada = pd.DataFrame([data])
    pred = modelo.predict(entrada)[0]
    prob = modelo.predict_proba(entrada)[0][1]
    resultado = "Diab√©tica" if pred == 1 else "No diab√©tica"
    st.write(f"Resultado: **{resultado}**")
    st.write(f"Probabilidad estimada: **{prob:.2f}**")

In [9]:
entrada

Unnamed: 0,npreg,glu,bp,skin,bmi,ped,age
0,2,100,70,20,25.0,0.5,35


In [10]:
resultado

'No diab√©tica'

In [11]:
prob

0.09206579282945723