## Make Regression 

In [7]:
from sklearn.datasets import make_regression

# Parameters
n_samples = 100
n_features = 12
n_informative = 8
bias = 3.0

# Generating the dataset
X, y, coefficients = make_regression(n_samples=n_samples, 
                                     n_features=n_features, 
                                     n_informative=n_informative, 
                                     coef=True, 
                                     bias=bias, 
                                     random_state=42)

# X contains the features, y contains the target variable, and coefficients contains the true coefficients
print("Shape of X (features):", X.shape)
print("Shape of y (target):", y.shape)
print("True coefficients:", coefficients)


Shape of X (features): (100, 12)
Shape of y (target): (100,)
True coefficients: [ 0.          0.         63.31513756 95.14033422  4.34125329 22.80797719
  0.         81.91888594  0.         88.42064633 21.20448399 60.16118201]


In [None]:
y = y.reshape(-1, 1)
data = np.concatenate([X, y], axis=1)

data

import pandas as pd 
column_names = [f"feature_{i}" for i in range(n_features)] + ["target"]
df = pd.DataFrame(data, columns=column_names)

csv_file = "regression_data.csv"
df.to_csv(csv_file, index=False)
print(f"Data saved to {csv_file}")

df.head()

## Linear Regression 

In [25]:
X = df.iloc[:,:-1].copy()
y = df.iloc[:,-1:].copy()

In [26]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Predict on the test data
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Optionally, compare the estimated coefficients with the true coefficients
print("Estimated coefficients:", model.coef_)
print("True coefficients:", coefficients)


Mean Squared Error: 6.108497087924591e-26
Estimated coefficients: [[ 3.89372680e-14  3.81916720e-14  6.33151376e+01  9.51403342e+01
   4.34125329e+00  2.28079772e+01  1.06414877e-13  8.19188859e+01
  -4.62963001e-14  8.84206463e+01  2.12044840e+01  6.01611820e+01]]
True coefficients: [ 0.          0.         63.31513756 95.14033422  4.34125329 22.80797719
  0.         81.91888594  0.         88.42064633 21.20448399 60.16118201]


## Ridge Regression 

In [27]:
from sklearn.datasets import make_regression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Ridge regression model
# You can adjust the alpha parameter to apply different strengths of regularization
ridge_model = Ridge(alpha=1.0)

# Fit the model on the training data
ridge_model.fit(X_train, y_train)

# Predict on the test data
y_pred = ridge_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Optionally, print the coefficients
print("Ridge Regression coefficients:", ridge_model.coef_)


Mean Squared Error: 4.8527497648597535
Ridge Regression coefficients: [[-7.20403679e-02 -3.16609250e-01  6.24175830e+01  9.40147306e+01
   4.21042332e+00  2.29278570e+01 -6.12598005e-02  8.10183044e+01
  -1.60756843e-01  8.75522908e+01  2.07726721e+01  5.95315506e+01]]


In [9]:
print("True coefficients:", coefficients)

True coefficients: [ 0.          0.         63.31513756 95.14033422  4.34125329 22.80797719
  0.         81.91888594  0.         88.42064633 21.20448399 60.16118201]


## Lasso Regression 

In [28]:
from sklearn.datasets import make_regression
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Lasso regression model
# You can adjust the alpha parameter to apply different strengths of regularization
lasso_model = Lasso(alpha=0.1)

# Fit the model on the training data
lasso_model.fit(X_train, y_train)

# Predict on the test data
y_pred = lasso_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Optionally, print the coefficients
print("Lasso Regression coefficients:", lasso_model.coef_)


Mean Squared Error: 0.09766838029211339
Lasso Regression coefficients: [-0.         -0.         63.20761134 95.02945814  4.18019413 22.72127733
 -0.         81.85761803 -0.         88.34942434 21.09388946 60.08988342]


In [29]:
print("True coefficients:", coefficients)

True coefficients: [ 0.          0.         63.31513756 95.14033422  4.34125329 22.80797719
  0.         81.91888594  0.         88.42064633 21.20448399 60.16118201]


## Polynomial Regression 

In [30]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline


# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Degree of the polynomial regression
max_degree = 10  # You can change this to experiment with different degrees

# Create a pipeline that first transforms the features to polynomial features, then fits a linear regression model
for degree in range(1, max_degree + 1):
    polynomial_regression = Pipeline([
        ("poly_features", PolynomialFeatures(degree=degree)),
        ("lin_reg", LinearRegression())
    ])
    
    # Fit the model
    polynomial_regression.fit(X_train, y_train)
    
    # Predict and evaluate the model
    y_pred = polynomial_regression.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(degree, "Mean Squared Error:", mse)


1 Mean Squared Error: 4.440340263308031e-26
2 Mean Squared Error: 2113.1971602835642
3 Mean Squared Error: 3856.025578881459
4 Mean Squared Error: 8335.186506895845
5 Mean Squared Error: 13722.152867429098
6 Mean Squared Error: 17609.36805224184
7 Mean Squared Error: 54045.366707631285
8 Mean Squared Error: 61723.10430043525
9 Mean Squared Error: 414384.3618531814
10 Mean Squared Error: 652428.877242517
