## 导包
- pandas用于快速读取数据集

In [6]:
import os
import pickle
import pandas as pd
from tqdm import trange
import numpy as np

## 需要使用的工具类函数
- 分批加载数据
- 保存模型
- 加载模型

In [7]:
def dataloader(X, y, BATCH_SIZE):
    n = len(X)
    for t in range(0, n, BATCH_SIZE):
        yield X[t:t+BATCH_SIZE, ...], y[t:t+BATCH_SIZE, ...]
        
def save_params_to_file(model):
    terminal_path = ["src/slow/save_weights/", "slow/save_weights/", "save_weights/", "../save_weights/"]
    dirPath = None
    for path in terminal_path:
        if os.path.isdir(path):
            dirPath = path
    if dirPath == None:
        raise FileNotFoundError("save_params_to_file(): Impossible to find save_weights/ from current folder. You need to manually add the path to it in the \'terminal_path\' list and the run the function again.")

    weights = model.get_params()
    with open(dirPath + "final_weights.pkl","wb") as f:
	    pickle.dump(weights, f)
        
def load_params_from_file(model):
    terminal_path = ["src/slow/save_weights/final_weights.pkl", "slow/save_weights/final_weights.pkl",
    "save_weights/final_weights.pkl", "../save_weights/final_weights.pkl"]

    filePath = None
    for path in terminal_path:
        if os.path.isfile(path):
            filePath = path
    if filePath == None:
        raise FileNotFoundError('load_params_from_file(): Cannot find final_weights.pkl from your current folder. You need to manually add it to terminal_path list and the run the function again.')

    pickle_in = open(filePath, 'rb')
    params = pickle.load(pickle_in)
    model.set_params(params)
    return model

## Layers
- 定义Fc层
- 定义Loss函数：MSELoss
- 定义Softmax
- 定义TanH/Relu
- 定义Adam

In [8]:
class Fc():

    def __init__(self, row, column):
        self.row = row
        self.col = column
        
        #Initialize Weight/bias.
        bound = 1 / np.sqrt(self.row)
        self.W = {'val': np.random.uniform(low=-bound, high=bound, size=(self.row, self.col)), 'grad': 0}
        self.b = {'val': np.random.uniform(low=-bound, high=bound, size=(1, self.row)), 'grad': 0}
        
        self.cache = None

    def forward(self, fc):
        self.cache = fc
        A_fc = np.dot(fc, self.W['val'].T) + self.b['val']
        return A_fc

    def backward(self, deltaL):
        fc = self.cache
        m = fc.shape[0]

        #Compute gradient.
    
        self.W['grad'] = (1/m) * np.dot(deltaL.T, fc)
        self.b['grad'] = (1/m) * np.sum(deltaL, axis = 0)

        #Compute error.
        new_deltaL = np.dot(deltaL, self.W['val']) 
        #We still need to multiply new_deltaL by the derivative of the activation
        #function which is done in TanH.backward().

        return new_deltaL, self.W['grad'], self.b['grad']

class AdamGD():

    def __init__(self, lr, beta1, beta2, epsilon, params):
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.params = params
        
        self.momentum = {}
        self.rmsprop = {}

        for key in self.params:
            self.momentum['vd' + key] = np.zeros(self.params[key].shape)
            self.rmsprop['sd' + key] = np.zeros(self.params[key].shape)

    def update_params(self, grads):

        for key in self.params:
            # Momentum update.
            self.momentum['vd' + key] = (self.beta1 * self.momentum['vd' + key]) + (1 - self.beta1) * grads['d' + key] 
            # RMSprop update.
            self.rmsprop['sd' + key] =  (self.beta2 * self.rmsprop['sd' + key]) + (1 - self.beta2) * grads['d' + key]**2 
            # Update parameters.
            self.params[key] += -self.lr * self.momentum['vd' + key] / (np.sqrt(self.rmsprop['sd' + key]) + self.epsilon)  

        return self.params

class TanH():
 
    def __init__(self, alpha = 1.7159):
        self.alpha = alpha
        self.cache = None

    def forward(self, X):
        self.cache = X
        return self.alpha * np.tanh(X)

    def backward(self, new_deltaL):
        X = self.cache
        return new_deltaL * (1 - np.tanh(X)**2)


class Softmax():

    def __init__(self):
        pass

    def forward(self, X):
        return np.exp(X) / np.sum(np.exp(X), axis=1)[:, np.newaxis]

class MSELoss:

    def __init__(self):
        pass

    def get(self, y_pred, y):
        batch_size = y_pred.shape[1]
        deltaL = y_pred - y
        loss = np.sum(np.square(y - y_pred)) / batch_size
        return loss, deltaL

    
class ReLU():
    def __init__(self):
        self.cache = None

    def forward(self, X):
        self.cache = X
        return np.maximum(0, X)

    def backward(self, new_deltaL):
        X = self.cache
        dX = np.where(X > 0, 1, 0)  # Derivative of ReLU
        return new_deltaL * dX

## Model
- Implement MLP Regression
- Hidden layers depth and number of neurons are flexible
- Activation select TanH

In [9]:
class Net5:
    def __init__(self, input=10, hidden1=120, hidden2=84, output=1):
        self.fc1 = Fc(row = hidden1, column = input)
        self.tanh3 = TanH()
        self.fc2 = Fc(row = hidden2, column = hidden1)
        self.tanh4 = TanH()
        self.fc3 = Fc(row = output, column = hidden2)
        # self.softmax = Softmax()
        self.layers = [self.fc1, self.fc2, self.fc3]

    def forward(self, X):
        fc1 = self.fc1.forward(X) 
        act3 = self.tanh3.forward(fc1)
        fc2 = self.fc2.forward(act3)
        act4 = self.tanh4.forward(fc2)
        fc3 = self.fc3.forward(act4)
        # y_pred = self.softmax.forward(fc3)
        return fc3
        
    def backward(self, deltaL):
        #Compute gradient for weight/bias between fc3 and fc2.
        deltaL, dW3, db3, = self.fc3.backward(deltaL)

        #Compute error at fc2 layer.
        deltaL = self.tanh4.backward(deltaL) 
        
        #Compute gradient for weight/bias between fc2 and fc1.
        deltaL, dW2, db2 = self.fc2.backward(deltaL)
        #Compute error at fc1 layer.
        deltaL = self.tanh3.backward(deltaL) 
        
        deltaL, dW1, db1 = self.fc1.backward(deltaL)

        grads = { 
                'dW1': dW1, 'db1': db1,
                'dW2': dW2, 'db2': db2,
                'dW3': dW3, 'db3': db3,
        }

        return grads


    def get_params(self):
        params = {}
        for i, layer in enumerate(self.layers):
            params['W' + str(i+1)] = layer.W['val']
            params['b' + str(i+1)] = layer.b['val']

        return params

    def set_params(self, params):
        for i, layer in enumerate(self.layers):
            layer.W['val'] = params['W'+ str(i+1)]
            layer.b['val'] = params['b' + str(i+1)]

## Load Data

In [None]:
data_path = "data/Boston_house_prices.xlsx"
df = pd.read_excel(data_path, sheet_name = 0)
dataset = np.array(df)

## Divide the data

In [None]:
labels = dataset[:,-1:]
features = dataset[:,:-1]

size = 0.8
train_size = int(0.8*len(features))

X = features[:train_size] # (404, 10)
X_test = features[train_size:] # (102, 10)
y = labels[:train_size]
y_test = labels[train_size:]

X_train = X
y_train = y

## Model Loss Optimizer

In [None]:
model = Net5()

# MSE for regression
cost = MSELoss()

params = model.get_params()
optimizer = AdamGD(lr = 0.001, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8, params = model.get_params())   

## Train and Evauation
使用RMSE和MAPE来评估Train/Evaluate结果

In [10]:
train_costs, val_costs = [], []

print("----------------TRAINING-----------------\n")

NB_EPOCH = 2
BATCH_SIZE = 100

print("EPOCHS: {}".format(NB_EPOCH))
print("BATCH_SIZE: {}".format(BATCH_SIZE))
print("")

nb_train_examples = len(X_train)
# nb_val_examples = len(X_val)

best_val_loss = float('inf')


for epoch in range(NB_EPOCH):

    #-------------------------------------------------------------------------------
    #                                       
    #                               TRAINING PART
    #
    #-------------------------------------------------------------------------------

    train_loss = 0
    
    # RMSE and MAPE
    train_rmse = 0
    train_mape = 0

    pbar = trange(nb_train_examples // BATCH_SIZE)
    train_loader = dataloader(X_train, y_train, BATCH_SIZE)

    for i, (X_batch, y_batch) in zip(pbar, train_loader):
        y_pred = model.forward(X_batch)
        loss, deltaL = cost.get(y_pred, y_batch)

        grads = model.backward(deltaL)
        params = optimizer.update_params(grads)
        model.set_params(params)

        train_loss += loss * BATCH_SIZE
        
        # RMSE and MAPE
        train_rmse += np.sqrt(np.mean((y_batch - y_pred) ** 2))
        train_mape += np.mean(np.abs((y_batch - y_pred) / y_batch))
        
        pbar.set_description("[Train] Epoch {}".format(epoch+1))

    train_loss /= nb_train_examples
    train_costs.append(train_loss)
    
    # RMSE and MAPE
    train_rmse /= nb_train_examples
    train_mape /= nb_train_examples
    
    info_train = "train-loss: {:0.6f} | train-rmse: {:0.3f}"
    print(info_train.format(train_loss, train_rmse))
    info_train = "train-loss: {:0.6f} | train-mape: {:0.3f}"
    print(info_train.format(train_loss, train_mape))
    save_params_to_file(model)
    
    
print("--------------------EVALUATION-------------------\n")
BATCH_SIZE = 100
nb_test_examples = len(X_test)
test_loss = 0

# RMSE and MAPE
test_rmse = 0
test_mape = 0

pbar = trange(nb_test_examples // BATCH_SIZE)
test_loader = dataloader(X_test, y_test, BATCH_SIZE)

for i, (X_batch, y_batch) in zip(pbar, test_loader):
  
    y_pred = model.forward(X_batch)
    loss, deltaL = cost.get(y_pred, y_batch)

    test_loss += loss * BATCH_SIZE
    
    # RMSE and MAPE
    test_rmse += np.sqrt(np.mean((y_batch - y_pred) ** 2))
    test_mape += np.mean(np.abs((y_batch - y_pred) / y_batch))

    pbar.set_description("Evaluation")

test_loss /= nb_test_examples

# RMSE and MAPE
test_rmse /= nb_train_examples
test_mape /= nb_train_examples

info_test = "test-loss: {:0.6f} | test-rmse: {:0.3f}"
print(info_test.format(test_loss, test_rmse))
info_test = "test-loss: {:0.6f} | test-mape: {:0.3f}"
print(info_test.format(test_loss, test_mape))

----------------TRAINING-----------------

EPOCHS: 2
BATCH_SIZE: 100


[Train] Epoch 1: 100%|██████████| 4/4 [00:00<00:00, 24.06it/s]


train-loss: 47493.856179 | train-rmse: 0.207
train-loss: 47493.856179 | train-mape: 0.010


[Train] Epoch 2: 100%|██████████| 4/4 [00:00<00:00, 55.60it/s]


train-loss: 18469.112809 | train-rmse: 0.130
train-loss: 18469.112809 | train-mape: 0.006
--------------------EVALUATION-------------------


Evaluation: 100%|██████████| 1/1 [00:00<00:00, 102.77it/s]

test-loss: 8790.368155 | test-rmse: 0.023
test-loss: 8790.368155 | test-mape: 0.002



