In [1]:
# ============================================================================
# DEPLOYMENT EN AMAZON SAGEMAKER
# Heart Disease Prediction Model
# ============================================================================

import sagemaker
from sagemaker import get_execution_role
from sagemaker.sklearn import SKLearn
import boto3
import json
import pandas as pd

# 1. CONFIGURACIÓN INICIAL
print("="*70)
print("CONFIGURACIÓN DE SAGEMAKER")
print("="*70)

# Obtener rol y sesión
role = get_execution_role()
sess = sagemaker.Session()
bucket = sess.default_bucket()
prefix = 'heart-disease-model'

print(f"Role: {role}")
print(f"Bucket: {bucket}")
print(f"Prefix: {prefix}")

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
CONFIGURACIÓN DE SAGEMAKER


In [None]:
# 2. SUBIR DATOS DE ENTRENAMIENTO A S3
print("\n" + "="*70)
print("SUBIENDO DATOS A S3")
print("="*70)


data = pd.read_csv('Heart_Disease_Prediction.csv')


data['Heart Disease'] = data['Heart Disease'].map({'Absence': 0, 'Presence': 1})


from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(data, test_size=0.3, random_state=42, 
                                         stratify=data['Heart Disease'])


train_data.to_csv('train.csv', index=False)
test_data.to_csv('test.csv', index=False)


train_input = sess.upload_data('train.csv', bucket=bucket, key_prefix=f'{prefix}/train')
test_input = sess.upload_data('test.csv', bucket=bucket, key_prefix=f'{prefix}/test')

print(f"Train data uploaded to: {train_input}")
print(f"Test data uploaded to: {test_input}")

In [None]:
# 3. CREAR ESTIMATOR Y ENTRENAR
print("\n" + "="*70)
print("CREANDO ESTIMATOR")
print("="*70)

sklearn_estimator = SKLearn(
    entry_point='train.py',
    role=role,
    instance_type='ml.m5.large',  
    framework_version='1.2-1',
    py_version='py3',
    hyperparameters={
        'alpha': 0.01,
        'num_iters': 1500
    }
)

print("Iniciando entrenamiento...")
sklearn_estimator.fit({'train': train_input})
print("✓ Entrenamiento completado")

In [None]:
# 4. DEPLOY DEL MODELO (CREAR ENDPOINT)
print("\n" + "="*70)
print("DEPLOYANDO MODELO - CREANDO ENDPOINT")
print("="*70)

predictor = sklearn_estimator.deploy(
    initial_instance_count=1,
    instance_type='ml.t2.medium',  
    endpoint_name='heart-disease-endpoint'
)

print("✓ Endpoint creado exitosamente")
print(f"Endpoint name: heart-disease-endpoint")

In [None]:
# 5. PROBAR EL ENDPOINT
print("\n" + "="*70)
print("PROBANDO EL ENDPOINT")
print("="*70)

test_patient = {
    'Age': 60,
    'Cholesterol': 300,
    'BP': 150,
    'Max HR': 120,
    'ST depression': 2.5,
    'Number of vessels fluro': 2,
    'Chest pain type': 3,
    'Thallium': 7
}

print("Input:")
print(json.dumps(test_patient, indent=2))

# Hacer predicción
import time
start_time = time.time()

prediction = predictor.predict(test_patient)

latency = (time.time() - start_time) * 1000 

print("\nRespuesta del endpoint:")
print(json.dumps(prediction, indent=2))
print(f"\nLatencia: {latency:.2f} ms")

In [None]:
# 6. PROBAR CON MÚLTIPLES PACIENTES DEL TEST SET
print("\n" + "="*70)
print("EVALUACIÓN CON DATOS DE PRUEBA")
print("="*70)

test_samples = test_data.head(10)

results = []
for idx, row in test_samples.iterrows():
    patient_data = {
        'Age': int(row['Age']),
        'Cholesterol': int(row['Cholesterol']),
        'BP': int(row['BP']),
        'Max HR': int(row['Max HR']),
        'ST depression': float(row['ST depression']),
        'Number of vessels fluro': int(row['Number of vessels fluro']),
        'Chest pain type': int(row['Chest pain type']),
        'Thallium': int(row['Thallium'])
    }
    
    pred = predictor.predict(patient_data)
    true_label = int(row['Heart Disease'])
    
    results.append({
        'True': true_label,
        'Predicted': pred['prediction'],
        'Probability': pred['probability'],
        'Correct': pred['prediction'] == true_label
    })

results_df = pd.DataFrame(results)
print(results_df)

accuracy = results_df['Correct'].sum() / len(results_df)
print(f"\nAccuracy en muestra: {accuracy:.2%}")

In [None]:
# 7. LIMPIAR RECURSOS
print("\n" + "="*70)
print("LIMPIANDO RECURSOS")
print("="*70)

# Eliminar endpoint
predictor.delete_endpoint()
print("✓ Endpoint eliminado")

# Opcional: eliminar modelo
# predictor.delete_model()