<a href="https://colab.research.google.com/github/Joelss23/Proyectos-y-Actividades/blob/main/CasoPracticoDeepLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Caso Practico Final Deep Learning Joel Adrian Yari

##Librerias

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

## 1. Cargar ambos ficheros dados

In [None]:
train = pd.read_csv('train_v2.csv')
test = pd.read_csv('test_v2.csv')

## 2. Análisis exploratorio: identificar valores desconocidos, filas duplicadas y estadísticos de las variables del conjunto de datos

In [None]:
#Valores Nulos
print("Valores nulos por columna:")
print(train.isnull().sum())

In [None]:
#Filas Duplicadas
print("Filas duplicadas:", train.duplicated().sum())

In [None]:
#Tipo de datos
print("Tipo de datos:")
for column in train.columns:
    print(f"{column}: {train[column].dtype}")

In [None]:
#Visualizacion preeliminar de los datos
with pd.option_context('display.max_columns', None):
  print(train.head())

In [None]:
#Estadisticas Descriptivas
print("Estadisticas descriptivas:")
print(train.describe())

## 3. Ingeniería de variables: crear una variable

In [None]:
train['TotalSF'] = train['TotalBsmtSF'] + train['1stFlrSF'] + train['2ndFlrSF']
test['TotalSF'] = test['TotalBsmtSF'] + test['1stFlrSF'] + test['2ndFlrSF']

## 4. Eliminar las variables de entrada no numéricas

In [None]:
train_numeric = train.select_dtypes(include=[np.number])
test_numeric = test.select_dtypes(include=[np.number])

## 5. Eliminar la(s) variable(s) de entrada que no tengan sentido lógico para realizar la predicción

In [None]:
train_numeric = train_numeric.drop(columns=['Id'], errors='ignore')
test_numeric = test_numeric.drop(columns=['Id'], errors='ignore')

## 6. Normalizar variables de entrada mediante min-max

In [None]:
# Separar variables independientes y la dependiente
X = train_numeric.drop(columns=['SalePrice'])
y = train_numeric['SalePrice']

# Rellenar valores faltantes con la media
X = X.fillna(X.mean())
test_numeric = test_numeric.fillna(X.mean())

#Normalización
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_test = test_numeric.drop(columns=['SalePrice'], errors='ignore')
test_scaled = scaler.transform(X_test)

## 7. Dividir los datos de entrenamiento en Train (80%) y Validation (20%)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

## 8. Crear una Red Neuronal con 2 capas ocultas, 200 neuronas en cada capa y función de activación ReLu

In [None]:
def build_model(dropout_rate=0.0):
    model = Sequential()
    model.add(Dense(200, input_dim=X_train.shape[1], activation='relu'))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(200, activation='relu'))
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

## 9. Entrenar el algoritmo utilizando la métrica RMSE como función de coste

In [None]:
model = build_model()
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, verbose=0)

## 10. Evaluar la predicción en Test

In [None]:
train_preds = model.predict(X_train)
val_preds = model.predict(X_val)

rmse_train = np.sqrt(mean_squared_error(y_train, train_preds))
rmse_val = np.sqrt(mean_squared_error(y_val, val_preds))
print(f"RMSE en entrenamiento: {rmse_train:.2f}")
print(f"RMSE en validacion: {rmse_val:.2f}")

In [None]:
# Visualizacion del aprendizaje
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Perdida de entrenamiento')
plt.plot(history.history['val_loss'], label='Perdida de validacion')
plt.title('Curva de perdida durante el entrenamiento')
plt.xlabel('Epoca')
plt.ylabel('MSE')
plt.legend()
plt.grid(True)
plt.show()

## 11. Crear una arquitectura que produzca overfit

In [None]:
overfit_model = Sequential([
    Dense(400, input_dim=X_train.shape[1], activation='relu'),
    Dense(400, activation='relu'),
    Dense(1)
])
overfit_model.compile(optimizer='adam', loss='mse')
overfit_history = overfit_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, verbose=0)

In [None]:
#Visualizacion del overfitting
plt.figure(figsize=(10, 5))
plt.plot(overfit_history.history['loss'], label='Overfit - entrenamiento')
plt.plot(overfit_history.history['val_loss'], label='Overfit - validacion')
plt.title('Curva de perdida - Overfitting')
plt.xlabel('Epoca')
plt.ylabel('MSE')
plt.legend()
plt.grid(True)
plt.show()

## 12. Probar 3 ejemplos con distintas regularizaciones y identificar la que mejor funciona

In [None]:
#Regularización con Dropout
dropouts = [0.2, 0.4, 0.6]
for rate in dropouts:
    print(f"Entrenando modelo con Dropout = {rate}")
    reg_model = build_model(dropout_rate=rate)
    reg_history = reg_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, verbose=0)
    val_preds = reg_model.predict(X_val)
    rmse_val = np.sqrt(mean_squared_error(y_val, val_preds))
    print(f"RMSE en validacion con Dropout {rate}: {rmse_val:.2f}")

    # Visualizar cada entrenamiento con dropout
    plt.figure(figsize=(10, 5))
    plt.plot(reg_history.history['loss'], label='Entrenamiento')
    plt.plot(reg_history.history['val_loss'], label='Validacion')
    plt.title(f'Curva de perdida - Dropout {rate}')
    plt.xlabel('Epoca')
    plt.ylabel('MSE')
    plt.legend()
    plt.grid(True)
    plt.show()
