<a href="https://colab.research.google.com/github/JuanQuiroga12/DeepLearning/blob/main/HomeWork2_JuanQuiroga%26MarielbyPaz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
#Integrantes: Juan Quiroga y Marielby Paz
#Homework 2
import pandas as pd
import numpy as np
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from google.colab import files
from sklearn.preprocessing import RobustScaler

def prepare_data():
    print("Please upload your Excel file")
    uploaded = files.upload()
    file_name = list(uploaded.keys())[0]
    df = pd.read_excel(file_name, sheet_name='Datos')

    print("\nValores nulos:", df.isnull().sum().sum())
    df['GENERO'] = df['GENERO'].map({'MASCULINO': 0, 'FEMENINO': 1})

    print("\nEstadísticas del DataFrame:")
    print(df.describe())

    # Separate features and label
    y = df['EVOLUCION'].values
    X = df.drop('EVOLUCION', axis=1).values

    # Use RobustScaler for better handling of outliers
    scaler_X = RobustScaler()
    scaler_y = RobustScaler()

    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

    # Convert to float32
    X_scaled = X_scaled.astype(np.float32)
    y_scaled = y_scaled.astype(np.float32)

    # Split data 80/20
    train_size = int(0.8 * len(X_scaled))
    X_train = X_scaled[:train_size]
    y_train = y_scaled[:train_size]
    X_test = X_scaled[train_size:]
    y_test = y_scaled[train_size:]

    return X_train, y_train, X_test, y_test, scaler_X, scaler_y

class AdvancedRegression(tf.keras.Model):
    def __init__(self, num_features):
        super(AdvancedRegression, self).__init__()
        # Layer configuration
        self.batch_norm1 = tf.keras.layers.BatchNormalization()
        self.dense1 = tf.keras.layers.Dense(128, kernel_initializer='he_normal')
        self.activation1 = tf.keras.layers.LeakyReLU(negative_slope=0.1)  # Updated from alpha to negative_slope
        self.dropout1 = tf.keras.layers.Dropout(0.3)

        self.batch_norm2 = tf.keras.layers.BatchNormalization()
        self.dense2 = tf.keras.layers.Dense(64, kernel_initializer='he_normal')
        self.activation2 = tf.keras.layers.LeakyReLU(negative_slope=0.1)  # Updated from alpha to negative_slope
        self.dropout2 = tf.keras.layers.Dropout(0.2)

        self.batch_norm3 = tf.keras.layers.BatchNormalization()
        self.dense3 = tf.keras.layers.Dense(32, kernel_initializer='he_normal')
        self.activation3 = tf.keras.layers.LeakyReLU(negative_slope=0.1)  # Updated from alpha to negative_slope

        self.output_layer = tf.keras.layers.Dense(1, kernel_initializer='he_normal')

    def call(self, x, training=False):
        x = self.batch_norm1(x, training=training)
        x = self.dense1(x)
        x = self.activation1(x)
        x = self.dropout1(x, training=training)

        x = self.batch_norm2(x, training=training)
        x = self.dense2(x)
        x = self.activation2(x)
        x = self.dropout2(x, training=training)

        x = self.batch_norm3(x, training=training)
        x = self.dense3(x)
        x = self.activation3(x)

        return self.output_layer(x)

def custom_loss(y_true, y_pred):
    # Combine MSE with absolute percentage error
    mse = tf.reduce_mean(tf.square(y_true - y_pred))
    absolute_percentage = tf.reduce_mean(tf.abs((y_true - y_pred) / y_true))
    return mse + 0.1 * absolute_percentage

def train_model(X_train, y_train, X_test, y_test, scaler_y):
    # Hyperparameters
    num_epochs = 2000
    batch_size = 6
    initial_learning_rate = 0.001

    # Model initialization
    num_features = X_train.shape[1]
    model = AdvancedRegression(num_features)

    # Learning rate schedule - corrected 'stair' to 'staircase'
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate,
        decay_steps=100,
        decay_rate=0.95,
        staircase=True)

    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    # Convert to tensors
    X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
    y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
    X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
    y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)

    # Training metrics
    train_errors = []
    test_errors = []
    best_error = float('inf')
    patience = 200
    patience_counter = 0
    min_delta = 0.0001

    print("\nIniciando entrenamiento...")

    @tf.function
    def train_step(x, y):
        with tf.GradientTape() as tape:
            predictions = model(x, training=True)
            loss = custom_loss(y, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
        return loss, predictions

    for epoch in range(num_epochs):
        # Training
        indices = tf.random.shuffle(tf.range(len(X_train)))
        X_shuffled = tf.gather(X_train, indices)
        y_shuffled = tf.gather(y_train, indices)

        epoch_losses = []
        for i in range(0, len(X_train), batch_size):
            batch_x = X_shuffled[i:i+batch_size]
            batch_y = y_shuffled[i:i+batch_size]
            loss, _ = train_step(batch_x, batch_y)
            epoch_losses.append(float(loss))

        # Evaluate
        train_pred = model(X_train, training=False)
        test_pred = model(X_test, training=False)

        # Convert predictions back to original scale for error calculation
        train_pred_original = scaler_y.inverse_transform(train_pred.numpy())
        test_pred_original = scaler_y.inverse_transform(test_pred.numpy())
        y_train_original = scaler_y.inverse_transform(y_train.numpy())
        y_test_original = scaler_y.inverse_transform(y_test.numpy())

        train_error = np.mean(np.abs((y_train_original - train_pred_original) / y_train_original)) * 100
        test_error = np.mean(np.abs((y_test_original - test_pred_original) / y_test_original)) * 100

        train_errors.append(train_error)
        test_errors.append(test_error)

        # Early stopping check
        if test_error < best_error - min_delta:
            best_error = test_error
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"\nDetención temprana en época {epoch + 1}")
            break

        if (epoch + 1) % 50 == 0:
            print(f'Época {epoch + 1}:')
            print(f'  Error porcentual entrenamiento: {train_error:.2f}%')
            print(f'  Error porcentual test: {test_error:.2f}%')

    return model, train_errors[-1], test_errors[-1], scaler_y

if __name__ == "__main__":
    # Prepare data
    X_train, y_train, X_test, y_test, scaler_X, scaler_y = prepare_data()

    # Train model
    model, final_train_error, final_test_error, scaler_y = train_model(
        X_train, y_train, X_test, y_test, scaler_y)

    print("\nResultados Finales:")
    print(f"Error Porcentual Final Entrenamiento: {final_train_error:.2f}%")
    print(f"Error Porcentual Final Test: {final_test_error:.2f}%")

    # Show predictions
    print("\nComparación de algunas predicciones:")
    X_sample = X_test[:5]
    y_sample = scaler_y.inverse_transform(y_test[:5])
    predictions = scaler_y.inverse_transform(model(X_sample, training=False).numpy())

    print("Real vs Predicho (Error %):")
    for real, pred in zip(y_sample, predictions):
        error_percent = abs((real[0] - pred[0]) / real[0]) * 100
        print(f"Real: {real[0]:.3f}, Predicho: {pred[0]:.3f}, Error: {error_percent:.2f}%")

Please upload your Excel file


Saving Diagnostico.xlsx to Diagnostico (43).xlsx

Valores nulos: 0

Estadísticas del DataFrame:
             EDAD      GENERO    ESTATURA        PESO  PRESION_SANG  \
count  442.000000  442.000000  442.000000  442.000000    442.000000   
mean    48.518100    0.468326  170.217195   76.692081     94.647014   
std     13.109028    0.499561   11.432455   16.273156     13.831283   
min     19.000000    0.000000  150.000000   42.400000     62.000000   
25%     38.250000    0.000000  160.000000   64.750000     84.000000   
50%     50.000000    0.000000  170.500000   75.050000     93.000000   
75%     59.000000    1.000000  180.000000   85.575000    105.000000   
max     79.000000    1.000000  190.000000  141.200000    133.000000   

       RESULTADO1  RESULTADO2  RESULTADO3  RESULTADO4  RESULTADO5  RESULTADO6  \
count  442.000000  442.000000  442.000000  442.000000  442.000000  442.000000   
mean    91.260181   49.788462  189.140271    4.641411  115.439140    4.070249   
std     11.496335   1