In [29]:
# -*- coding: utf-8 -*-
"""
Created on Wed Mar  1 09:05:03 2023 - 2024/06/27

@author: Anahí Romo
"""
# https://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputRegressor.html
# https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html

# example of making a prediction with the direct multioutput regression model
from sklearn.datasets import make_regression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd
import numpy as np


In [30]:
from sklearn.datasets import make_regression

# 📌 1. Generamos un dataset de regresión artificial
X, y = make_regression(
    n_samples=1000,      # número de muestras = 1000 filas (observaciones)
    n_features=10,       # cada muestra tiene 10 características (columnas)
    n_informative=2,     # solo 2 de esas 10 características realmente influyen en la salida
    n_targets=2,         # queremos predecir 2 variables de salida al mismo tiempo (multi-output regression)
    random_state=1,      # semilla para reproducibilidad
    noise=0.5            # ruido gaussiano agregado a la salida para simular datos más realistas
)

# 📌 2. Revisamos la forma de la matriz de entrada
print(X.shape)  # (1000, 10) → 1000 filas x 10 features

# 📌 3. Vemos la primera fila de X
# Esto es un vector de 10 números que representa las 10 características de un ejemplo
print("Primera muestra de X:", X[0])


(1000, 10)
Primera muestra de X: [ 0.72667997  0.98199366 -0.37195994  0.22445073  0.74205658 -1.09330391
 -1.40525766  0.438562    0.92781985  1.96427946]


In [42]:
print(y.shape)
y[0]

(1000, 2)


array([-53.3740129 ,  15.62916171])

In [43]:
names=[f'x{i+1}' for i in range(10)]
names

['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10']

In [33]:
XX=pd.DataFrame(X, columns=names)
XX

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
0,0.726680,0.981994,-0.371960,0.224451,0.742057,-1.093304,-1.405258,0.438562,0.927820,1.964279
1,-0.489084,-0.090965,0.578705,0.990631,-0.550144,-1.468319,0.311875,-1.902717,0.600032,-1.010139
2,1.175245,1.558688,1.119899,0.861289,0.483884,-1.987830,-2.048200,2.369730,1.562420,-0.870802
3,-0.443988,0.351517,0.305031,0.202462,0.623651,-2.087238,-0.214191,-0.293695,1.336392,-0.008131
4,1.202059,-0.594676,-1.271874,-0.091764,-0.013095,1.040153,1.083952,-1.251391,-0.769384,-0.254311
...,...,...,...,...,...,...,...,...,...,...
995,0.283732,0.450391,2.365601,-1.582294,-0.159635,0.982684,-0.006196,-0.120499,0.385565,0.456093
996,1.923815,0.612233,-0.605981,-0.452525,0.204377,1.803589,-0.446699,0.744885,-0.036281,-0.832395
997,0.241136,-0.320525,-0.277095,0.155405,0.600008,0.095703,0.247136,1.702785,1.186874,0.908461
998,-0.169968,1.096016,0.769551,-0.715846,0.943203,-0.988518,-1.268590,-0.296347,0.228370,-0.045193


In [None]:
from sklearn.preprocessing import PolynomialFeatures

# 📌 PolynomialFeatures ¿Para qué sirve?

# Le das a un modelo lineal la capacidad de aprender curvas y relaciones más complejas.
# En lugar de entrenar redes neuronales o modelos no lineales pesados, podés usar esta técnica en problemas más simples.
# Muy usado en Polynomial Regression o Modelos Lineales Generalizados.

# 📌 Recordemos:
# X tiene 1000 muestras y 10 variables (shape = (1000, 10))

# 📌 1. Creamos el generador de características polinómicas de grado 2
# - degree=2 → genera términos hasta grado 2 (cuadráticos)
# - include_bias=True → incluye el término constante (1)
poly = PolynomialFeatures(degree=2, include_bias=True)

# 📌 2. Transformamos X en su versión polinómica
# fit_transform genera todas las combinaciones de features hasta el grado indicado
Xp = poly.fit_transform(X)

# 📌 3. Revisamos la forma del nuevo array
print("Forma de Xp:", Xp.shape)  
# Debería ser (1000, 66) → 1000 filas y 66 columnas (features polinómicas)

# 📌 4. Vemos la segunda fila (índice 1) con todas sus features polinómicas
print("Segunda fila de Xp (66 features):\n", Xp[1])


Forma de Xp: (1000, 66)
Segunda fila de Xp (66 features):
 [ 1.         -0.48908438 -0.09096474  0.57870467  0.99063126 -0.55014423
 -1.46831866  0.3118749  -1.9027168   0.60003224 -1.01013865  0.23920353
  0.04448943 -0.28303541 -0.48450227  0.26906695  0.71813172 -0.15253314
  0.93058906 -0.29346639  0.49404303  0.00827458 -0.05264172 -0.09011251
  0.05004372  0.13356522 -0.02836962  0.17308013 -0.05458177  0.091887
  0.3348991   0.57328294 -0.31837104 -0.84972287  0.18048346 -1.10111111
  0.34724146 -0.58457195  0.98135029 -0.54499007 -1.45456236  0.30895302
 -1.88489074  0.59441069 -1.00067492  0.30265867  0.80778704 -0.17157617
  1.04676867 -0.33010427  0.55572195  2.15595969 -0.45793173  2.79379458
 -0.88103853  1.48320542  0.09726595 -0.59340961  0.18713499 -0.31503689
  3.62033123 -1.14169142  1.92200777  0.36003869 -0.60611575  1.02038008]


In [35]:
names2=poly.get_feature_names_out(names)
names2

array(['1', 'x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'x9', 'x10',
       'x1^2', 'x1 x2', 'x1 x3', 'x1 x4', 'x1 x5', 'x1 x6', 'x1 x7',
       'x1 x8', 'x1 x9', 'x1 x10', 'x2^2', 'x2 x3', 'x2 x4', 'x2 x5',
       'x2 x6', 'x2 x7', 'x2 x8', 'x2 x9', 'x2 x10', 'x3^2', 'x3 x4',
       'x3 x5', 'x3 x6', 'x3 x7', 'x3 x8', 'x3 x9', 'x3 x10', 'x4^2',
       'x4 x5', 'x4 x6', 'x4 x7', 'x4 x8', 'x4 x9', 'x4 x10', 'x5^2',
       'x5 x6', 'x5 x7', 'x5 x8', 'x5 x9', 'x5 x10', 'x6^2', 'x6 x7',
       'x6 x8', 'x6 x9', 'x6 x10', 'x7^2', 'x7 x8', 'x7 x9', 'x7 x10',
       'x8^2', 'x8 x9', 'x8 x10', 'x9^2', 'x9 x10', 'x10^2'], dtype=object)

In [36]:
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor

# 📌 1. Definimos el modelo base
# LinearRegression → un modelo lineal simple
# fit_intercept=False → como ya agregamos el término constante en PolynomialFeatures,
# no queremos que la regresión añada otro intercepto.
model = LinearRegression(fit_intercept=False)

# 📌 2. Creamos un "wrapper" de salida múltiple
# MultiOutputRegressor permite entrenar un modelo que prediga más de una variable target a la vez.
# En tu caso, y tiene 2 columnas (n_targets=2).
wrapper = MultiOutputRegressor(model)

# 📌 3. Entrenamos el modelo sobre TODO el dataset
# Xp = features polinómicas (66 columnas generadas con PolynomialFeatures)
# y  = variables de salida (2 columnas)
wrapper.fit(Xp, y)

print("Modelo entrenado sobre features polinómicas para 2 targets.")


Modelo entrenado sobre features polinómicas para 2 targets.


In [37]:
X_df=pd.DataFrame(Xp, columns=names2)
X_df

Unnamed: 0,1,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x7^2,x7 x8,x7 x9,x7 x10,x8^2,x8 x9,x8 x10,x9^2,x9 x10,x10^2
0,1.0,0.726680,0.981994,-0.371960,0.224451,0.742057,-1.093304,-1.405258,0.438562,0.927820,...,1.974749,-0.616293,-1.303826,-2.760319,0.192337,0.406907,0.861458,0.860850,1.822497,3.858394
1,1.0,-0.489084,-0.090965,0.578705,0.990631,-0.550144,-1.468319,0.311875,-1.902717,0.600032,...,0.097266,-0.593410,0.187135,-0.315037,3.620331,-1.141691,1.922008,0.360039,-0.606116,1.020380
2,1.0,1.175245,1.558688,1.119899,0.861289,0.483884,-1.987830,-2.048200,2.369730,1.562420,...,4.195125,-4.853682,-3.200148,1.783576,5.615621,3.702513,-2.063565,2.441155,-1.360557,0.758295
3,1.0,-0.443988,0.351517,0.305031,0.202462,0.623651,-2.087238,-0.214191,-0.293695,1.336392,...,0.045878,0.062907,-0.286243,0.001742,0.086257,-0.392492,0.002388,1.785945,-0.010866,0.000066
4,1.0,1.202059,-0.594676,-1.271874,-0.091764,-0.013095,1.040153,1.083952,-1.251391,-0.769384,...,1.174952,-1.356447,-0.833975,-0.275661,1.565979,0.962800,0.318242,0.591951,0.195663,0.064674
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1.0,0.283732,0.450391,2.365601,-1.582294,-0.159635,0.982684,-0.006196,-0.120499,0.385565,...,0.000038,0.000747,-0.002389,-0.002826,0.014520,-0.046460,-0.054959,0.148661,0.175854,0.208021
996,1.0,1.923815,0.612233,-0.605981,-0.452525,0.204377,1.803589,-0.446699,0.744885,-0.036281,...,0.199540,-0.332739,0.016207,0.371830,0.554853,-0.027025,-0.620038,0.001316,0.030200,0.692882
997,1.0,0.241136,-0.320525,-0.277095,0.155405,0.600008,0.095703,0.247136,1.702785,1.186874,...,0.061076,0.420820,0.293319,0.224514,2.899478,2.020992,1.546915,1.408670,1.078229,0.825302
998,1.0,-0.169968,1.096016,0.769551,-0.715846,0.943203,-0.988518,-1.268590,-0.296347,0.228370,...,1.609320,0.375943,-0.289708,0.057331,0.087821,-0.067677,0.013393,0.052153,-0.010321,0.002042


In [38]:
import numpy as np

# 📌 1. Definimos un ejemplo puntual (fila con 10 variables de entrada)
# Estas son las 10 features originales (como las que generaste con make_regression)
row = [0.21947749, 0.32948997, 0.81560036, 0.440956,
       -0.0606303, -0.29257894, -0.2820059, -0.00290545,
       0.96402263, 0.04992249]

# 📌 2. Transformamos esa fila a features polinómicas
# poly.transform → expande la fila en 66 columnas (1, x1, x2, ..., x1², x1*x2, ...)
# reshape(1, -1) → convertimos la lista en un array de 1 fila y 10 columnas (forma correcta para sklearn)
rowp = poly.transform(np.array(row).reshape(1, -1))
print("Shape de rowp (fila expandida a polinomios):", rowp.shape)

# 📌 3. Usamos el modelo entrenado para predecir las 2 salidas
yhat = wrapper.predict(rowp)

# 📌 4. Mostramos el resultado
# yhat es un array con 2 valores porque entrenamos el modelo con n_targets=2
print(f'Predicted: yhat={yhat[0]}')


Shape de rowp (fila expandida a polinomios): (1, 66)
Predicted: yhat=[-30.11406321  -2.5434302 ]


In [39]:
r2=wrapper.score(Xp, y)
print(r2)

0.9998215439244125


In [40]:
# Obtener parámetros generales del wrapper
z = wrapper.get_params()
print(z)

# Obtener parámetros del modelo base
v = model.get_params()
print(v)

# Ver los modelos internos entrenados (uno por cada salida)
print(wrapper.estimators_)


{'estimator__copy_X': True, 'estimator__fit_intercept': False, 'estimator__n_jobs': None, 'estimator__positive': False, 'estimator': LinearRegression(fit_intercept=False), 'n_jobs': None}
{'copy_X': True, 'fit_intercept': False, 'n_jobs': None, 'positive': False}
[LinearRegression(fit_intercept=False), LinearRegression(fit_intercept=False)]


In [41]:
# obtain coef from each estimator

coeficientes=[]
for estimator in wrapper.estimators_:
    c=pd.DataFrame(estimator.coef_, index=X_df.columns, columns=['Coefficients'])
    coeficientes.append(c)
print(coeficientes)

# 📌 Qué pasa aquí

# wrapper.estimators_ → es la lista de modelos entrenados, uno por cada target.

# estimator.coef_ → array con los pesos de las features (en tu caso, 66 coeficientes porque PolynomialFeatures(degree=2) generó 66 columnas).

# Vos estás poniendo index=X_df.columns, pero X_df solo tiene 10 columnas originales, no las 66.
# 👉 Por eso el índice no te va a cuadrar con la cantidad de coeficientes.


[        Coefficients
1          -0.039435
x1          0.002178
x2          0.010606
x3         -0.010485
x4         -0.019024
...              ...
x8 x9       0.017510
x8 x10      0.054531
x9^2       -0.008369
x9 x10      0.034686
x10^2       0.002210

[66 rows x 1 columns],         Coefficients
1          -0.014075
x1         -0.013917
x2          0.006262
x3         -0.027721
x4          0.017521
...              ...
x8 x9       0.009130
x8 x10     -0.029899
x9^2        0.000577
x9 x10     -0.018558
x10^2      -0.010158

[66 rows x 1 columns]]
