In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Cargar dataset
df = pd.read_csv('auto-mpg.csv')

# Convertir horsepower a numérico y manejar "?"
df['horsepower'] = pd.to_numeric(df['horsepower'], errors='coerce')

# Imputar valores nulos en horsepower con la mediana
median_horsepower = df['horsepower'].median()
df['horsepower'].fillna(median_horsepower, inplace=True)

# Eliminar columna 'car name'
df = df.drop('car name', axis=1)

# Seleccionar atributos relevantes
selected_features = ['weight', 'horsepower', 'model year', 'origin']
X = df[selected_features]
y = df['mpg']

# Escalar variables predictoras
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_scaled_df = pd.DataFrame(X_scaled, columns=selected_features)

# Configurar k-fold cross-validation (k=5)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Modelo 1: Regresión Lineal
lr_mse_scores = []
for train_index, test_index in kf.split(X_scaled_df):
    X_train, X_test = X_scaled_df.iloc[train_index], X_scaled_df.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_lr = LinearRegression()
    model_lr.fit(X_train, y_train)
    y_pred = model_lr.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    lr_mse_scores.append(mse)

print("Regresión Lineal - MSE por fold:", lr_mse_scores)
print("Regresión Lineal - MSE promedio:", np.mean(lr_mse_scores))
print("Regresión Lineal - Desviación estándar MSE:", np.std(lr_mse_scores))

# Modelo 2: Red Neuronal
def create_nn_model(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dense(32, activation='relu'),
        Dense(1)  # Salida lineal para regresión
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

nn_mse_scores = []
for train_index, test_index in kf.split(X_scaled_df):
    X_train, X_test = X_scaled_df.iloc[train_index], X_scaled_df.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_nn = create_nn_model(input_dim=X_scaled_df.shape[1])
    model_nn.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)
    y_pred = model_nn.predict(X_test, verbose=0)
    mse = mean_squared_error(y_test, y_pred)
    nn_mse_scores.append(mse)

print("\nRed Neuronal - MSE por fold:", nn_mse_scores)
print("Red Neuronal - MSE promedio:", np.mean(nn_mse_scores))
print("Red Neuronal - Desviación estándar MSE:", np.std(nn_mse_scores))

ModuleNotFoundError: No module named 'tensorflow'