In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
# creating random test data and plotting it
plt.figure()
plt.title("Example regression problem with 1 input variable")

X_R1, y_R1 = make_regression(
    # 100 data points
    n_samples=100,
    # all input only has one feature (x has one value)
    n_features=1,
    # number of informative features needed to make the model (here we said x had one)
    n_informative=1,
    # starting b value in the equation y = mx + b
    bias=150.0,
    # standard deviation
    noise=30,
    # seeding parameter
    random_state=0
)
plt.scatter(X_R1, y_R1)
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1, random_state=0)
# Linear regression with one input variable works by optimizing the values of w and b
# in the linear equation y = wx + b
linreg = LinearRegression().fit(X_train, y_train)

In [None]:
# if a scikit learn attribute ends in an underscore like the intercept_ or coef_,
# this means that it was derived from the training data
print("linear model intercept (b): {}".format(linreg.intercept_))
print("linear model coeff (w): {}".format(linreg.coef_))
# accuracy based on training data
print('R-squared score (training): {:.3f}'.format(linreg.score(X_train, y_train)))
# accuracy on unseen data
print('R-squared score (test): {:.3f}'.format(linreg.score(X_test, y_test)))

In [None]:
# showing the line we found using the linear regression model over the data points
plt.figure()
# plotting the old data
plt.scatter(X_R1, y_R1, marker='o', s=50, alpha=0.8)
# plotting the line (linreg.coef_ * X_R1 + linreg.intercept_ is the formula for the line
# similar to y = mx + b)
plt.plot(X_R1, linreg.coef_ * X_R1 + linreg.intercept_, 'r-')
plt.title("Least-squares linear regression")
plt.xlabel("Feature value (x)")
plt.ylabel("Target value (y)")
plt.grid(True)
plt.show()