1. 

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import max_error, mean_absolute_error, r2_score

# Paso 1: Creación del Dataset
data = {
    'num1': [10, 20, 30, 40, np.nan, 60, 70, 80, 90, 100],
    'num2': [5, 15, 25, 35, 45, 55, 65, 75, 85, 95],
    'num3': [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
    'cat1': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'B', 'A', 'C'],
    'num4': [1, 3, 5, 7, 9, 11, 13, 15, 17, 19],
    'target': [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]
}
df = pd.DataFrame(data)

# Paso 2: Preprocesamiento de Datos
start_time = time.time()

# Imputación de valores ausentes
imputer = SimpleImputer(strategy='mean')
df[['num1']] = imputer.fit_transform(df[['num1']])

# One Hot Encoding para datos categóricos
ohe = OneHotEncoder(drop='first', sparse=False)
ohe_features = ohe.fit_transform(df[['cat1']])
ohe_df = pd.DataFrame(ohe_features, columns=ohe.get_feature_names_out(['cat1']))
df = pd.concat([df.drop(columns=['cat1']), ohe_df], axis=1)

# División de datos en entrenamiento y prueba
X = df.drop(columns=['target'])
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Normalización de datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

preprocessing_time = time.time() - start_time

# Paso 3: Generación y Evaluación de Modelos
def train_and_evaluate(model, model_name, X_train, X_test, y_train, y_test):
    train_start = time.time()
    model.fit(X_train, y_train)
    train_time = time.time() - train_start
    
    test_start = time.time()
    y_pred = model.predict(X_test)
    test_time = time.time() - test_start
    
    metrics = {
        'Técnica': model_name,
        'M': max_error(y_test, y_pred),
        'MAE': mean_absolute_error(y_test, y_pred),
        'R^2': r2_score(y_test, y_pred),
        'Tiempo de Entrenamiento': train_time,
        'Tiempo de Prueba': test_time
    }
    return metrics

models = [
    (LinearRegression(), "Regresión Lineal"),
    (PolynomialFeatures(degree=4), "Regresión Polinómica Grado 4"),
    (KNeighborsRegressor(n_neighbors=4), "KNN con K=4")
]

results = []
for model, name in models:
    if isinstance(model, PolynomialFeatures):
        poly = model
        X_train_poly = poly.fit_transform(X_train)
        X_test_poly = poly.transform(X_test)
        regressor = LinearRegression()
        results.append(train_and_evaluate(regressor, name, X_train_poly, X_test_poly, y_train, y_test))
    else:
        results.append(train_and_evaluate(model, name, X_train, X_test, y_train, y_test))

# Creación del DataFrame con resultados
results_df = pd.DataFrame(results)
print(results_df)

# Valoración de Resultados
best_model = results_df.loc[results_df['R^2'].idxmax()]
print("El mejor modelo es:", best_model['Técnica'], "con R^2 de", best_model['R^2'])
