In [1]:
# ========================
# CÉLULA 1: Librerías
# ========================
import pandas as pd
import numpy as np
from sklearn.preprocessing import PolynomialFeatures


In [None]:
# ========================
# CÉLULA 2: Carga de Datos
# ========================
# Asegúrate de subir los archivos primero en el panel lateral de archivos de Colab

train_set = pd.read_csv("training_set_air_quality.csv")
val_set = pd.read_csv("validation_set_air_quality.csv")
test_set = pd.read_csv("test_set_air_quality.csv")

In [None]:

# ========================
# CÉLULA 3: Función de hipótesis
# ========================
def get_hypothesis(real_feature_names, categorical_feature_names=None, degree=3):
    """
    Crea una matriz con características polinomiales y categóricas.

    real_feature_names: lista de nombres de columnas numéricas
    categorical_feature_names: lista de columnas categóricas (opcional)
    degree: grado del polinomio
    """
    # Extraer y transformar características numéricas
    poly = PolynomialFeatures(degree=degree, include_bias=False)
    X_real = train_set[real_feature_names]
    X_real_poly = poly.fit_transform(X_real)
    poly_feature_names = poly.get_feature_names_out(real_feature_names)

    # Agregar características categóricas si existen
    if categorical_feature_names:
        X_cat = pd.get_dummies(train_set[categorical_feature_names], drop_first=True)
        X = np.hstack((X_real_poly, X_cat.values))
        feature_names = list(poly_feature_names) + list(X_cat.columns)
    else:
        X = X_real_poly
        feature_names = list(poly_feature_names)

    return pd.DataFrame(X, columns=feature_names)


In [None]:
# ========================
# CÉLULA 4: Ejemplo de uso
# ========================
# Definir las variables reales y categóricas (ajustar según tu dataset)
real_features = ['TEMP', 'PRES', 'DEWP']
categorical_features = ['WD']  # ejemplo

# Crear la matriz de diseño
X_poly = get_hypothesis(real_feature_names=real_features,
                        categorical_feature_names=categorical_features,
                        degree=2)

# Mostrar las primeras filas
X_poly.head()