### Simple Linear Regression in Python

In [1]:
import numpy as np 
from sklearn.linear_model import LinearRegression


In [2]:
# using -1 for reshape for the row argument allows numpy to automatically determine the size of one dimention
x = np.array([5,15,25,35,45,55]).reshape((-1,1))
y = np.array([5,20,14,32,22,38])

In [3]:
# Creating a model
model = LinearRegression()

In [4]:
model.fit(x,y) # fitting the model

In [11]:
# you could write like this too
model = LinearRegression().fit(x,y)

#### In Python, the use of f before a string is a way to create a formatted string literal, also known as f-string. F-strings provide a concise and convenient way to embed expressions inside string literals, making it easier to include variables and expressions in the string.

In [5]:
# getting coefficient of determination

r_sq = model.score(x,y)
print(f"coefficient of determination: {r_sq}")



coefficient of determination: 0.7158756137479542


In [14]:
# intercept

print(f"intercept: {model.intercept_}")

intercept: 5.633333333333329


In [15]:
# slope

print(f"slope: {model.coef_}")

slope: [0.54]


#### Note: In scikit-learn, by convention, a trailing underscore indicates that an attribute is estimated. In this example, .intercept_ and .coef_ are estimated values.

In [6]:
# you could use str or format like below to get the same results

print("slope: " + str(model.coef_))

slope: [0.54]


In [7]:
print("slope: {}".format(model.coef_))

slope: [0.54]


In [11]:
# predicting response

y_pred = model.predict(x)
print(f"predicted response:\n{y_pred}")

predicted response:
[ 8.33333333 13.73333333 19.13333333 24.53333333 29.93333333 35.33333333]


#### In practice, use like this below

In [12]:
x_new = np.arange(5).reshape((-1,1))

In [13]:
y_new = model.predict(x_new)
y_new

array([5.63333333, 6.17333333, 6.71333333, 7.25333333, 7.79333333])

In [14]:
y_pred = model.intercept_ + model.coef_ * x
print(f"predicted response:\n {y_pred}")

predicted response:
 [[ 8.33333333]
 [13.73333333]
 [19.13333333]
 [24.53333333]
 [29.93333333]
 [35.33333333]]


### Multiple Linear Regression with scikit-learn

In [15]:
x = [
   [0, 1], [5, 1], [15, 2], [25, 5], [35, 11], [45, 15], [55, 34], [60, 35] 
]

In [18]:
y = [4, 5, 20, 14, 32, 22, 38, 43]

In [19]:
x, y = np.array(x), np.array(y)

In [22]:
# Creating a model

model = LinearRegression().fit(x,y)

In [23]:
r_sq = model.score(x,y)


In [28]:
print(f"r_sq: {r_sq}\nintercept: {model.intercept_}\ncoefficient: {model.coef_}")


r_sq: 0.8615939258756776
intercept: 5.52257927519819
coefficient: [0.44706965 0.25502548]


In [29]:
y_pred_mlr = model.predict(x)
print(f"predict response:\n  {y_pred_mlr}")

predict response:
  [ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [31]:
y_pred = model.intercept_ + np.sum(model.coef_ * x,axis = 1)
print(f"predicted response:\n{y_pred}")

predicted response:
[ 5.77760476  8.012953   12.73867497 17.9744479  23.97529728 29.4660957
 38.78227633 41.27265006]


In [32]:
x_new = np.arange(10).reshape((-1,2))
x_new

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [33]:
y_new = model.predict(x_new)
y_new

array([ 5.77760476,  7.18179502,  8.58598528,  9.99017554, 11.3943658 ])