In [9]:
!pip install pandas numpy matplotlib seaborn scikit-learn tensorflow optuna
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import optuna
import joblib

# Cargar los datos preprocesados
X_train_scaled_imputed = np.load('X_train.npy')
X_test_scaled_imputed = np.load('X_test.npy')
y_train = np.load('y_train.npy')

# Cargar test_data y sus índices
loaded_test_data = np.load('test_data.npy')
columns = ['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'ocean_proximity', 'median_house_value']

# Asegurarse de que las dimensiones coincidan
if loaded_test_data.shape[1] == len(columns):
    test_data = pd.DataFrame(loaded_test_data, columns=columns)
    test_data_index = np.load('test_data_index.npy')

    # Obtener el índice real de test_data
    test_data.index = test_data_index

    # Separar la variable objetivo 'median_house_value'
    y_test = test_data['median_house_value'].copy()
    test_data.drop('median_house_value', axis=1, inplace=True)
else:
    print("Error: El número de columnas especificado no coincide con el conjunto de datos.")

# Cargar el objeto scaler
scaler = StandardScaler()
X_train_scaled_imputed = scaler.fit_transform(X_train_scaled_imputed)
X_test_scaled_imputed = scaler.transform(X_test_scaled_imputed)

# Modelo de Deep Learning
model_dl = Sequential()
model_dl.add(Dense(30, activation='relu', input_shape=(X_train_scaled_imputed.shape[1],)))
model_dl.add(Dense(1))
model_dl.compile(optimizer='adam', loss='mean_squared_error')

model_dl.fit(X_train_scaled_imputed, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Modelo de Machine Learning
model_ml = RandomForestRegressor(n_estimators=100, random_state=42)
model_ml.fit(X_train_scaled_imputed, y_train)

# Evaluación del modelo de Deep Learning
X_test_scaled_imputed_dl = scaler.transform(X_test_scaled_imputed)  # Corregir esta línea
y_pred_dl_imputed = model_dl.predict(X_test_scaled_imputed_dl)
mse_dl_imputed = mean_squared_error(y_test, y_pred_dl_imputed)
r2_dl_imputed = r2_score(y_test, y_pred_dl_imputed)
print(f"Deep Learning con Imputación - Mean Squared Error: {mse_dl_imputed}, R^2 Score: {r2_dl_imputed}")

# Evaluación del modelo de Machine Learning
y_pred_ml_imputed = model_ml.predict(X_test_scaled_imputed)
mse_ml_imputed = mean_squared_error(y_test, y_pred_ml_imputed)
r2_ml_imputed = r2_score(y_test, y_pred_ml_imputed)
print(f"Machine Learning con Imputación - Mean Squared Error: {mse_ml_imputed}, R^2 Score: {r2_ml_imputed}")

# Ajuste de Hiperparámetros (utilizando Optuna)
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 10, 200)
    max_depth = trial.suggest_int('max_depth', 5, 30)
    model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    model.fit(X_train_scaled_imputed, y_train)
    y_pred = model.predict(X_test_scaled_imputed)
    return mean_squared_error(y_test, y_pred)

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

best_params = study.best_params
best_model = RandomForestRegressor(n_estimators=best_params['n_estimators'],
                                    max_depth=best_params['max_depth'], random_state=42)
best_model.fit(X_train_scaled_imputed, y_train)

# Imprimir los mejores hiperparámetros
print(f"Best Hyperparameters: {best_params}")

# Evaluación del mejor modelo
y_pred_best = best_model.predict(X_test_scaled_imputed)
mse_best = mean_squared_error(y_test, y_pred_best)
r2_best = r2_score(y_test, y_pred_best)
print(f"Best Model - Mean Squared Error: {mse_best}, R^2 Score: {r2_best}")

# Ruta donde se guardarán los modelos
ruta_modelos = 'modelos/'

# Guardar el modelo de Deep Learning
model_dl.save(f'{ruta_modelos}modelo_dl')

# Guardar el modelo de Machine Learning
ruta_modelo_ml = f'{ruta_modelos}modelo_ml.joblib'
joblib.dump(model_ml, ruta_modelo_ml)

# Guardar el mejor modelo
ruta_mejor_modelo = f'{ruta_modelos}mejor_modelo.joblib'
joblib.dump(best_model, ruta_mejor_modelo)

# Puedes imprimir un mensaje indicando la ubicación de los modelos guardados
print(f"Modelos guardados en: {ruta_modelos}")


Epoch 1/50



[notice] A new release of pip available: 22.3.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Deep Learning con Imputación - Mean Squared Error: 44817280443.00663, R^2 Score: -2.420098701030696


[I 2024-01-31 20:03:57,964] A new study created in memory with name: no-name-7b207fb0-8063-4d53-9a14-07b72bc22462


Machine Learning con Imputación - Mean Squared Error: 2494274212.183793, R^2 Score: 0.8096568129797065


[I 2024-01-31 20:04:33,055] Trial 0 finished with value: 2560698030.6396503 and parameters: {'n_estimators': 192, 'max_depth': 14}. Best is trial 0 with value: 2560698030.6396503.
[I 2024-01-31 20:04:38,724] Trial 1 finished with value: 5223704457.159868 and parameters: {'n_estimators': 111, 'max_depth': 5}. Best is trial 0 with value: 2560698030.6396503.
[I 2024-01-31 20:05:14,256] Trial 2 finished with value: 2520824553.285361 and parameters: {'n_estimators': 180, 'max_depth': 15}. Best is trial 2 with value: 2520824553.285361.
[I 2024-01-31 20:05:47,173] Trial 3 finished with value: 2482713760.7763057 and parameters: {'n_estimators': 136, 'max_depth': 20}. Best is trial 3 with value: 2482713760.7763057.
[I 2024-01-31 20:06:15,553] Trial 4 finished with value: 2482764117.0780544 and parameters: {'n_estimators': 113, 'max_depth': 23}. Best is trial 3 with value: 2482713760.7763057.
[I 2024-01-31 20:06:23,394] Trial 5 finished with value: 5220615906.011435 and parameters: {'n_estimator

Best Hyperparameters: {'n_estimators': 136, 'max_depth': 20}
Best Model - Mean Squared Error: 2482713760.7763057, R^2 Score: 0.8105390147655189
INFO:tensorflow:Assets written to: modelos/modelo_dl\assets


INFO:tensorflow:Assets written to: modelos/modelo_dl\assets


Modelos guardados en: modelos/
