In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_friedman1
import matplotlib.pyplot as plt

In [None]:
plt.figure()
plt.title("Complex regression problem with one input variable")
X_F1, y_F1 = make_friedman1(
    n_samples = 100,
    n_features = 7,
    random_state = 0
)
plt.scatter(X_F1[:, 2], y_F1, marker='o', s=50)
plt.grid(True)
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_F1, y_F1, random_state=0)

linreg = LinearRegression().fit(X_train, y_train)

print("linear model coef (w): {}".format(linreg.coef_))
print("linear model intercept (b): {}".format(linreg.intercept_))
print("R-squared score (train): {:.3f}".format(linreg.score(X_train, y_train)))
print("R-squared score (test): {:.3f}".format(linreg.score(X_test, y_test)))

# Now With Polynomial Feature Transformation

In [None]:
# polynomial features allow us to fit data in a nonlinear way while still using a linear model.
# Polynomial features are created from already existing features in the model. For example, if there
# are two input features, and you want polynomial features of degree 2, then you can create a new
# set of data that has five features: (x0, x1, x0x1, x0^2, x1^2)
poly = PolynomialFeatures(degree=2)
# expanding on the already existing features
X_F1_poly = poly.fit_transform(X_F1)

X_train, X_test, y_train, y_test = train_test_split(X_F1_poly, y_F1, random_state=0)

linreg = LinearRegression().fit(X_train, y_train)

# we can see that there is some overfitting here. Typically when using polynomial transformation,
# you would also use this in combination with a regression technique with a penalty such as
# ridge regression
print("(poly degree 2) linear model coef (w): {}".format(linreg.coef_))
print("(poly degree 2) linear model intercept (b): {}".format(linreg.intercept_))
print("(poly degree 2) R-squared score (train): {:.3f}".format(linreg.score(X_train, y_train)))
print("(poly degree 2) R-squared score (test): {:.3f}".format(linreg.score(X_test, y_test)))

# Polynomial Transformation with Ridge

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_F1_poly, y_F1, random_state=0)

linreg = Ridge().fit(X_train, y_train)

print("(poly deg 2 + ridge) linear model coef (w): {}".format(linreg.coef_))
print("(poly deg 2 + ridge) linear model intercept (b): {}".format(linreg.intercept_))
print("(poly deg 2 + ridge) R-squared score (train): {:.3f}".format(linreg.score(X_train, y_train)))
print("(poly deg 2 + ridge) R-squared score (test): {:.3f}".format(linreg.score(X_test, y_test)))