In [1]:
import numpy as np

In [20]:
def predict(x, w):
    return np.dot(x, w)

In [25]:
def update_weights(x_train, y_train, weights, learning_rate):
    #get predictions first
    predictions = predict(x_train, weights)
    delta_weight = (np.dot(x_train.T, y_train - predictions)) / x_train.shape[0]
    weights += learning_rate * delta_weight
    return weights

In [5]:
def compute_cost(x, y, weights):
    predictions = predict(x, weights)
    error = y - predictions
    return np.mean((error ** 2) / 2.0)

In [47]:
def train_lr(x_train, y_train, max_iter, learning_rate, fit_intercept = False):
    if fit_intercept:
        x0 = np.ones((x_train.shape[0], 1))
        x_train = np.hstack((x0, x_train))

    #create weights matrix
    weights = np.zeros(x_train.shape[1])

    #iterate through the observations and update weights
    for iteration in range(max_iter):
        update_weights(x_train, y_train, weights, learning_rate)
        #display cost once in 100 iterations
        if iteration % 100 == 0:
            print(f"Cost: {compute_cost(x_train, y_train, weights):.2f}")
    return weights


In [48]:
def y_predict(x, weights):
    if x.shape[1] == weights.shape[0] - 1:
        intercept = np.ones((x.shape[0], 1))
        x = np.hstack((intercept, x) )
    return predict(x, weights)

In [49]:
#Let us test it on the diabetes dataset
from sklearn.datasets import load_diabetes
data = load_diabetes()
x = data.data
y = data.target
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=9)

In [50]:
weights = train_lr(x_train,y_train, max_iter = 20000, learning_rate = 1, fit_intercept = True)

Cost: 3023.83
Cost: 2110.61
Cost: 1858.50
Cost: 1753.96
Cost: 1697.64
Cost: 1663.90
Cost: 1642.87
Cost: 1629.49
Cost: 1620.85
Cost: 1615.17
Cost: 1611.38
Cost: 1608.81
Cost: 1607.02
Cost: 1605.75
Cost: 1604.82
Cost: 1604.12
Cost: 1603.58
Cost: 1603.16
Cost: 1602.81
Cost: 1602.52
Cost: 1602.26
Cost: 1602.04
Cost: 1601.84
Cost: 1601.65
Cost: 1601.48
Cost: 1601.32
Cost: 1601.17
Cost: 1601.03
Cost: 1600.88
Cost: 1600.75
Cost: 1600.62
Cost: 1600.49
Cost: 1600.36
Cost: 1600.24
Cost: 1600.11
Cost: 1599.99
Cost: 1599.88
Cost: 1599.76
Cost: 1599.64
Cost: 1599.53
Cost: 1599.42
Cost: 1599.31
Cost: 1599.20
Cost: 1599.09
Cost: 1598.98
Cost: 1598.87
Cost: 1598.77
Cost: 1598.66
Cost: 1598.56
Cost: 1598.46
Cost: 1598.36
Cost: 1598.26
Cost: 1598.16
Cost: 1598.06
Cost: 1597.96
Cost: 1597.86
Cost: 1597.76
Cost: 1597.67
Cost: 1597.57
Cost: 1597.48
Cost: 1597.39
Cost: 1597.29
Cost: 1597.20
Cost: 1597.11
Cost: 1597.02
Cost: 1596.93
Cost: 1596.84
Cost: 1596.75
Cost: 1596.66
Cost: 1596.57
Cost: 1596.49
Cost: 

In [51]:
y_pred = y_predict(x_test, weights)

In [52]:
from sklearn.metrics import r2_score, mean_squared_error
print(r2_score(y_test, y_pred))
print(mean_squared_error(y_test, y_pred))

0.6022765141795811
2197.928491521614


In [54]:
x[:5,:]

array([[ 0.03807591,  0.05068012,  0.06169621,  0.02187239, -0.0442235 ,
        -0.03482076, -0.04340085, -0.00259226,  0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, -0.02632753, -0.00844872,
        -0.01916334,  0.07441156, -0.03949338, -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, -0.00567042, -0.04559945,
        -0.03419447, -0.03235593, -0.00259226,  0.00286131, -0.02593034],
       [-0.08906294, -0.04464164, -0.01159501, -0.03665608,  0.01219057,
         0.02499059, -0.03603757,  0.03430886,  0.02268774, -0.00936191],
       [ 0.00538306, -0.04464164, -0.03638469,  0.02187239,  0.00393485,
         0.01559614,  0.00814208, -0.00259226, -0.03198764, -0.04664087]])