In [1]:
import numpy as np
from sklearn.utils import shuffle
from sklearn.datasets import load_diabetes

class LinerRegression:
    def __init__(self):
        pass
    def load_data(self):
        data = load_diabetes().data
        target = load_diabetes().target
        X, y = shuffle(data, target, random_state=42)
        X = X.astype(np.float32)
        y = y.reshape((-1, 1))
        data = np.concatenate((X, y), axis=1)
        return data
    def initialize_params(self, dims):
        w = np.zeros((dims, 1))
        b = 1
        return w, b
    def costFunction(self,X_train,y_train,w,b,type):
        self._type=type
        m,n=np.shape(X_train)
        X=np.zeros((1,m))
        if type == 'linear':
            X=np.vstack((X,X_train.T))
        if type == 'poly':
            for i in range(n):
                X=np.vstack((X,X_train[:,i]**(i+1)))
        if type == 'sigmoid':
            for i in range(n):
                X=np.vstack((X,1/(1+np.exp(-(X_train[:,i]-[np.mean(X_train[:,i])]*m)/np.std(X_train[:,i])))))
        if type == 'Gaussia':
            for i in range(n):
                X=np.vstack((X,np.exp(-(X_train[:,i]-[np.mean(X_train[:,i])]*m)**2/(np.std(X_train[:,i]))**2)))
        y_hat = np.dot(np.delete(X.T, 0, axis=1), w) + b
        loss = np.sum((y_hat - y_train) ** 2) / m
        dw = np.dot(X_train.T, (y_hat - y_train)) / m
        db = np.sum((y_hat - y_train)) / m
        return y_hat, loss, dw, db
    def linear_train(self, X, y, learning_rate, epochs,type):
        w, b = self.initialize_params(X.shape[1])
        for i in range(1, epochs):
            y_hat, loss, dw, db = self.costFunction(X, y,w,b,type)
            w += -learning_rate * dw
            b += -learning_rate * db
            if i % 10000 == 0:
                print('epoch %d loss %f' % (i, loss))

            params = {
                'w': w,
                'b': b
            }
            grads = {
                'dw': dw,
                'db': db
            }
        return loss, params, grads
    def predict(self, X_pre, params):
        w = params['w']
        b = params['b']
        m, n = np.shape(X_pre)
        X = np.zeros((1, m))
        if self._type == 'linear':
            X = np.vstack((X, X_pre.T))
        if self._type == 'poly':
            for i in range(n):
                X = np.vstack((X, X_pre[:, i] ** (i + 1)))
        if self._type == 'sigmoid':
            for i in range(n):
                X = np.vstack(
                    (X, 1 / (1 + np.exp(-(X_pre[:, i] - [np.mean(X_pre[:, i])] * m) / np.std(X_pre[:, i])))))
        if self._type == 'Gaussia':
            for i in range(n):
                X = np.vstack(
                    (X, np.exp(-(X_pre[:, i] - [np.mean(X_pre[:, i])] * m) ** 2 / (np.std(X_pre[:, i])) ** 2)))
        y_pred = np.dot(np.delete(X.T, 0, axis=1), w) + b
        return y_pred
    def linear_cross_validation(self, data, k, randomize=True):
        if randomize:
            data = list(data)
            shuffle(data)

        slices = [data[i::k] for i in range(k)]
        for i in range(k):
            validation = slices[i]
            train = [data
                     for s in slices if s is not validation for data in s]
            train = np.array(train)
            validation = np.array(validation)
            yield train, validation
if __name__ == '__main__':
    lr = LinerRegression()
    data = lr.load_data()

    for train, validation in lr.linear_cross_validation(data, 5):
        X_train = train[:, :10]
        y_train = train[:, -1].reshape((-1, 1))
        X_valid = validation[:, :10]
        y_valid = validation[:, -1].reshape((-1, 1))

        loss5 = []
        loss, params, grads = lr.linear_train(X_train, y_train, 0.001, 100000,type='linear')
        loss5.append(loss)
        score = np.mean(loss5)
        print('five kold cross validation score is', score)
        y_pred = lr.predict(X_valid, params)
        valid_score = np.sum(((y_pred - y_valid) ** 2)) / len(X_valid)
        print('valid score is', valid_score)


epoch 10000 loss 5611.700413
epoch 20000 loss 5258.723274
epoch 30000 loss 4960.269687
epoch 40000 loss 4707.233542
epoch 50000 loss 4492.066889
epoch 60000 loss 4308.511332
epoch 70000 loss 4151.375861
epoch 80000 loss 4016.353054
epoch 90000 loss 3899.867025
five kold cross validation score is 3798.95702810396
valid score is 4214.093759681286
epoch 10000 loss 5421.160329
epoch 20000 loss 5106.633201
epoch 30000 loss 4838.960980
epoch 40000 loss 4610.435973
epoch 50000 loss 4414.655301
epoch 60000 loss 4246.298198
epoch 70000 loss 4100.941431
epoch 80000 loss 3974.906301
epoch 90000 loss 3865.131792
five kold cross validation score is 3769.078413755655
valid score is 4615.044916771039
epoch 10000 loss 5586.655556
epoch 20000 loss 5295.084975
epoch 30000 loss 5044.858907
epoch 40000 loss 4829.542060
epoch 50000 loss 4643.730696
epoch 60000 loss 4482.885755
epoch 70000 loss 4343.193070
epoch 80000 loss 4221.446262
epoch 90000 loss 4114.948617
five kold cross validation score is 4021.439