In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
x = np.array([5, 15, 25, 35, 45, 55]).reshape((-1, 1))
y = np.array([5, 20, 14, 32, 22, 38])
#Now, you have two arrays: the input x and output y. You should call .reshape() on x because this array is required 
#to be two-dimensional, or to be more precise, to have one column and as many rows as necessary. 
#That’s exactly what the argument (-1, 1) of .reshape() specifies.

In [3]:
#This is how x and y look now:
print(x)
print(y)
#As you can see, x has two dimensions, and x.shape is (6, 1), while y has a single dimension, and y.shape is (6,).

[[ 5]
 [15]
 [25]
 [35]
 [45]
 [55]]
[ 5 20 14 32 22 38]


In [4]:
model = LinearRegression()
#This statement creates the variable model as the instance of LinearRegression.


#fit_intercept is a Boolean (True by default) that decides whether to calculate the intercept 𝑏₀ (True) or consider it equal to zero (False).
#normalize is a Boolean (False by default) that decides whether to normalize the input variables (True) or not (False).
#copy_X is a Boolean (True by default) that decides whether to copy (True) or overwrite the input variables (False).
#n_jobs is an integer or None (default) and represents the number of jobs used in parallel computation. None usually means one job and -1 to use all processors.

In [7]:
model.fit(x, y)

#With .fit(), you calculate the optimal values of the weights 𝑏₀ and 𝑏₁, using the existing input and output (x and y) as the arguments. 
#In other words, .fit() fits the model. It returns self, which is the variable model itself. 
#That’s why you can replace the last two statements with this one:

model = LinearRegression().fit(x, y)

In [11]:
r_sq = model.score(x, y)
print('coefficient of determination:', r_sq)

coefficient of determination: 0.715875613747954


In [12]:
print('intercept:', model.intercept_)
print('slope:', model.coef_)

#The value 𝑏₀ = 5.63 (approximately) illustrates that your model predicts the response 5.63 when 𝑥 is zero. 
#The value 𝑏₁ = 0.54 means that the predicted response rises by 0.54 when 𝑥 is increased by one.

intercept: 5.633333333333333
slope: [0.54]


In [13]:
y_pred = model.predict(x)
print('predicted response:', y_pred, sep='\n')
#When applying .predict(), you pass the regressor as the argument and get the corresponding predicted response.
#Once there is a satisfactory model, you can use it for predictions with either existing or new data

predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


In [15]:
x_new = np.arange(5).reshape((-1, 1))
print(x_new)
y_new = model.predict(x_new)
print(y_new)

#Here .predict() is applied to the new regressor x_new and yields the response y_new. 
#This example conveniently uses arange() from numpy to generate an array with the elements from 0 (inclusive) to 5 (exclusive), that is 0, 1, 2, 3, and 4.



[[0]
 [1]
 [2]
 [3]
 [4]]
[5.63333333 6.17333333 6.71333333 7.25333333 7.79333333]
