# Checking correlated features in LinearRegression

In [89]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LinearRegression

# Linear regression model

In [161]:
X, y = datasets.make_regression(n_samples=50000, n_features=2, random_state=42, noise=30)

In [162]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [163]:
# Create linear regression object
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)
# The coefficients
print("Coefficients: \n", regr.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_pred))

Coefficients: 
 [68.08657704 29.56215236]
Mean squared error: 914.22
Coefficient of determination: 0.85


# Creating correlated features

In [164]:
# creating correlated features
X = np.append(X, X+0.01, axis=1)

In [165]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [166]:
# Create linear regression object
regr = LinearRegression()

# Train the model using the training sets
regr.fit(X_train, y_train)

# Make predictions using the testing set
y_pred = regr.predict(X_test)
# The coefficients
print("Coefficients: \n", regr.coef_)
# The mean squared error
print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(y_test, y_pred))

Coefficients: 
 [-2.73423252e+13  3.88918934e+13  2.73423252e+13 -3.88918934e+13]
Mean squared error: 892.86
Coefficient of determination: 0.86
