In [20]:
###############################################################################
## Import stuff
 
import numpy as np
from sklearn import datasets, linear_model, metrics
 
###############################################################################
## Load the diabetes dataset
 
diabetes = datasets.load_diabetes()
diabetes_X = diabetes.data # matrix of dimensions 442x10
 
# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]
 
# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]
 
###############################################################################
## Our own implementation
 
# train
X = diabetes_X_train
y = diabetes_y_train


# train: init
W = np.random.uniform(low=-0.1, high=0.1, size=diabetes_X.shape[1])
b = 0.0
l = {}
learning_rate = 0.1
epochs = 100000

# train: gradient descent
for i in range(epochs):
    
    # calculate predictions
    y_predict = X.dot(W) + b
    
    # calculate error and cost (mean squared error)
    error = y - y_predict
    mean_squared_error = np.mean(np.power(error, 2))
    l[mean_squared_error] = [W,b]
    
    # calculate gradients
    W_gradient = -(1.0/len(X)) * error.dot(X)
    b_gradient = -(1.0/len(X)) * np.sum(error)
    
    # update parameters
    W = W - (learning_rate * W_gradient)
    b = b - (learning_rate * b_gradient)
    
    # diagnostic output
    if i % 5000 == 0: print("Epoch %d: %f" % (i, mean_squared_error))
 
# test
X = diabetes_X_test
y = diabetes_y_test

y_predict = X.dot(W) + b
error = y - y_predict
mean_squared_error = np.mean(np.power(error, 2))
print("Mean squared error: %f" % mean_squared_error)
print('Optimal Parameters Obtained are: ', l[min(l)]) 
print("="*120)

###############################################################################

Epoch 0: 29468.864346
Epoch 5000: 3048.212223
Epoch 10000: 2941.417938
Epoch 15000: 2927.459482
Epoch 20000: 2924.754068
Epoch 25000: 2923.796558
Epoch 30000: 2923.196808
Epoch 35000: 2922.695534
Epoch 40000: 2922.232378
Epoch 45000: 2921.790534
Epoch 50000: 2921.364394
Epoch 55000: 2920.951575
Epoch 60000: 2920.550731
Epoch 65000: 2920.160918
Epoch 70000: 2919.781383
Epoch 75000: 2919.411494
Epoch 80000: 2919.050705
Epoch 85000: 2918.698544
Epoch 90000: 2918.354591
Epoch 95000: 2918.018481
Mean squared error: 1993.529194
Optimal Parameters Obtained are:  [array([   3.66176486, -234.66419181,  519.3952477 ,  325.58124255,
       -176.07597911,  -16.45955744, -180.0799883 ,  108.04651727,
        502.77483651,   78.97037491]), 152.72872039776334]


In [None]:
# Create linear regression object
regr = linear_model.LinearRegression()
 
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)
 
# Make predictions using the testing set
diabetes_y_pred = regr.predict(diabetes_X_test)
 
# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
mean_squared_error = metrics.mean_squared_error(diabetes_y_test, diabetes_y_pred)
print("Mean squared error: %.2f" % mean_squared_error)
print("="*120)