In [0]:
# File location and type
file_location = "/FileStore/tables/datos_limpios-2.csv"
file_type = "csv"

# CSV options
infer_schema = "false"
first_row_is_header = "false"
delimiter = ","

# The applied options are for CSV files. For other file types, these will be ignored.
df = spark.read.format(file_type) \
  .option("inferSchema", infer_schema) \
  .option("header", first_row_is_header) \
  .option("sep", delimiter) \
  .load(file_location)

# Importe el conjunto de datos de diabetes y divídalo en entrenamiento y prueba usando scikit-learn
import pandas as pd

db = df.toPandas()
db = db.drop(0)

# Convertir todas las columnas a tipo numérico
for col in db.columns:
    db[col] = pd.to_numeric(db[col], errors='coerce')


X = db.iloc[:,1:21]
y = db.iloc[:, -1]

import mlflow
import mlflow.tensorflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

# Dividir los datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Escalar los datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train).astype(float)
X_test_scaled = scaler.transform(X_test).astype(float)

# Configurar MLflow para usar el backend de Databricks
mlflow.set_tracking_uri("databricks")
# Finalizar cualquier ejecución activa
if mlflow.active_run():
    mlflow.end_run()
      
# Iniciar un nuevo experimento
with mlflow.start_run():
    # Registrar hiperparámetros
    optimizer = 'adam'
    num_epochs = 200
    batch_size = 32
    validation_split = 0.2
    mlflow.log_param("optimizer", optimizer)
    mlflow.log_param("num_epochs", num_epochs)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("validation_split", validation_split)

    # Definir el modelo de red neuronal
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)  # Capa de salida con una neurona para regresión
    ])

    # Compilar el modelo
    from keras.losses import mean_squared_error
    model.compile(optimizer='adam', loss="mse")

  # Entrenar el modelo
    history = model.fit(X_train_scaled, y_train, epochs=num_epochs, batch_size=batch_size, validation_split=validation_split)

    # Calcular R2
    from sklearn.metrics import r2_score
    y_pred = model.predict(X_test_scaled)
    r2 = r2_score(y_test, y_pred)
    mlflow.log_metric("R2", r2)

    # Finalizar el experimento
    mlflow.end_run()








   


Epoch 1/200
Epoch 2/200


Epoch 200/200
