### Import related package

In [1]:
## Import tensorflow package for modeling
import torch
from torch.autograd import Variable
import torch.optim as optim

## Data processing
import pandas as pd
import numpy as np

## Min-max normalization
from sklearn.preprocessing import MinMaxScaler

## Plot the graph
import matplotlib.pyplot as plt
%matplotlib inline

## Initializing module
from sklearn.linear_model import LinearRegression
np.set_printoptions(suppress=True)

## Copy module
import copy

## Used to calculate the training time
import time

## Set the GUP environment
import os

### Control memory usage space for GPU

In [2]:
# gpus = tf.config.list_physical_devices(device_type='GPU')

# tf.config.set_logical_device_configuration(
#     gpus[0],
#     [tf.config.LogicalDeviceConfiguration(memory_limit=2048),
#      tf.config.LogicalDeviceConfiguration(memory_limit=2048)]
# )

In [3]:
# os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [4]:
# config = tf.compat.v1.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction =0.1
# tf.compat.v1.Session(config=config)

In [5]:
# gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.5)
# sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
# tf.compat.v1.keras.backend.set_session(sess)

## Print out some info

In [6]:
def accuracy_cacl(pred_value, actual_value):
    
#     yo, loss, tape = network.forward()
    accuracy = list()

    for i in range(pred_value.shape[1]):
        
        correct_times = np.sum(tf.math.abs(pred_value[:,i] - actual_value[:,i]) < 2000)
        accuracy.append(correct_times/pred_value.shape[0])   
        
    return accuracy

In [7]:
def plot_result(name, pred_value, actual_value):
    
    fig, ax = plt.subplots(2,2,figsize=(20,10), sharex=True, sharey=True)

    for i in range(yo.shape[1]):
        ax[i//2,i%2].plot(pred_value[:,i], label="LLAAT")
        ax[i//2,i%2].plot(actual_value[:,i], label="Actual")
        ax[i//2,i%2].set_title("Forecasted performance for l=%d" %(i+1))
        ax[i//2,i%2].legend()
    #fig.text(0.5, 0, "Stage of training", ha='center', fontsize=20)
    #fig.text(0, 0.5, "Copper price value", va='center', rotation='vertical')
    fig.suptitle("In the %s process"%(name))
    fig.tight_layout()

In [8]:
def plot_adopted_node(network):
    
    fig, ax = plt.subplots(figsize=(20,5))
    ax.set_title("Total amount of adopted hidden nodes in the training process")
    ax.plot(network.nb_node_acceptable,"-o")

    ax.set_xlabel("Stage of training")
    ax.set_ylabel("Hidden nodes")

In [9]:
# print("The training time(s):",end - start)

# print("-"*30)

def validation(network,nb_step4,nb_step6_1,nb_step6_2,x_test_scaled,y_test_data):

    ## Training_Step
    yo, loss, tape = network.forward()
    pred_value_train = sc.inverse_transform(yo)
    actual_value_train = sc.inverse_transform(network.y)
    accuracy_train = accuracy_cacl(pred_value_train, actual_value_train)



    ## Test_step
    pred_value_test = network.forecast(x_test_scaled)
    pred_value_test = sc.inverse_transform(pred_value_test)
    accuracy_test = accuracy_cacl(pred_value_test, y_test_data)



    total_time = nb_step4 + nb_step6_1 + nb_step6_2
    print("<<The percentage of each step>>")
    print("Step 4: %.2f%%"%(nb_step4/total_time))
    print("Step 6.1: %.2f%%"%(nb_step6_1/total_time))
    print("Step 6.2: %.2f%%"%(nb_step6_2/total_time))

    print("-"*60)
    print("Total amount of cramming occurrences:",nb_step6_2)

    print("-"*60)
    print("The amount of hidden node that be pruned:",network.nb_node_pruned)

    print("-"*60)
    print("The amount of adopted hidden nodes:",network.nb_node_acceptable[-1].numpy())

    print("-"*60)
    print("<<Accuracy in training step>>")
    print("The accuracy for l = 1: %.1f%%" %(accuracy_train[0]*100))
    print("The accuracy for l = 2: %.1f%%" %(accuracy_train[1]*100))
    print("The accuracy for l = 3: %.1f%%" %(accuracy_train[2]*100))
    print("The accuracy for l = 4: %.1f%%" %(accuracy_train[3]*100))


    print("-"*60)
    print("<<Accuracy in inferencing step>>")
    print("The accuracy for l = 1: %.1f%%" %(accuracy_test[0]*100))
    print("The accuracy for l = 2: %.1f%%" %(accuracy_test[1]*100))
    print("The accuracy for l = 3: %.1f%%" %(accuracy_test[2]*100))
    print("The accuracy for l = 4: %.1f%%" %(accuracy_test[3]*100))


    plot_result("training",pred_value_train, actual_value_train)
    plot_result("inferencing",pred_value_test, y_test_data)
    plot_adopted_node(network)

### Preprocessing the data

In [10]:
def read(path):
    return pd.read_csv(path)

In [11]:
def buildTrain(train, pastWeek=4, futureWeek=4, defaultWeek=1):
    X_train, Y_train = [], []
    for i in range(train.shape[0]-futureWeek-pastWeek):
        X = np.array(train.iloc[i:i+defaultWeek])
        X = np.append(X,train["CCSP"].iloc[i+defaultWeek:i+pastWeek])
        X_train.append(X.reshape(X.size))
        Y_train.append(np.array(train.iloc[i+pastWeek:i+pastWeek+futureWeek]["CCSP"]))
    return np.array(X_train), np.array(Y_train)

### Min-max normalization

In [12]:
## Use min-max normalization to scale the data to the range from 1 to 0
sc = MinMaxScaler(feature_range = (0, 1))

### Design get_data() to get data

In [13]:
def get_data(futureWeek):
    
    ## Read weekly copper price data
    path = "WeeklyFinalData.csv"
    data = read(path)
    
    date = data["Date"]
    data.drop("Date", axis=1, inplace=True)
    
    ## Add time lag (pastWeek=4, futureWeek=1)
    x_data, y_data = buildTrain(data, futureWeek=futureWeek)

    
    ## Split the data to training data and test data
    x_train_data = x_data[:int(x_data.shape[0]*0.8)]
    x_test_data = x_data[int(x_data.shape[0]*0.8):]
    y_train_data = y_data[:int(x_data.shape[0]*0.8)]
    y_test_data = y_data[int(x_data.shape[0]*0.8):]


    return (x_train_data, x_test_data, y_train_data, y_test_data)

#     return (x_data, y_data)

In [14]:
x_train_data, x_test_data, y_train_data, y_test_data = get_data(4)
print(x_train_data.shape)
print(x_test_data.shape)
print(y_train_data.shape)
print(y_test_data.shape)

(376, 18)
(95, 18)
(376, 4)
(95, 4)


### Network class

In [15]:
class Network(torch.nn.Module):
    
    def __init__(self, nb_neuro, x_train_scaled, y_train_scaled):
        
        super(Network, self).__init__()
        self.linear1 = torch.nn.Linear(x_train_scaled.shape[1], nb_neuro)
        self.linear2 = torch.nn.Linear(nb_neuro, y_train_scaled.shape[1])

        
        # Stop criteria - threshold
        self.threshold_for_error = 2000
        self.threshold_for_lr = 1e-4
        
        # Input data 
        self.x = torch.FloatTensor(x_train_scaled)
        self.y = torch.FloatTensor(y_train_scaled)
        
        # Learning rate
        self.learning_rate = 1e-2
        
        # Whether the network is acceptable, default as False
        self.acceptable = False
        
        # Some record for experiment
        self.nb_node_pruned = 0
        self.nb_node_acceptable=torch.IntTensor([nb_neuro])
        
    ## Forecast the test data
    def forecast(self, x_test_scaled):
    
        x_test_scaled = torch.FloatTensor(x_test_scaled)
        activation_value = self.linear1(self.x).clamp(min=0)
        forecast_value = self.linear2(h_relu)
       
        return forecast_value

    ## Reset the x and y data
    def setData(self, x_train_scaled, y_train_scaled):
        self.x = torch.FloatTensor(x_train_scaled)
        self.y = torch.FloatTensor(y_train_scaled)
    
    ## Add the new data to the x and y data
    def addData(self, new_x_train, new_y_train):

        self.x = torch.cat([self.x, new_x_train.reshape(1,-1)],0)
        self.y = torch.cat([self.y, new_y_train.reshape(1,-1)],0)
    
    ## forward operation
    def forward(self, reg_strength=0):
       
        y1 = self.linear1(self.x).clamp(min=0)
        yo = self.linear2(y1)

        # performance measure
        reg_term= reg_strength/((self.linear2.bias.data.shape[0]*(self.linear2.weight.data.shape[1]+1)) +   (self.linear2.weight.data.shape[1]*(self.linear1.weight.data.shape[1]+1)))*(torch.sum(torch.pow(self.linear2.bias.data,2))+torch.sum(torch.pow(self.linear2.weight.data,2))+torch.sum(torch.pow(self.linear1.bias.data,2))+torch.sum(torch.pow(self.linear1.weight.data,2)))
        loss = torch.nn.functional.mse_loss(yo,self.y)+reg_term
        return(yo, loss)

    # backward operation
    def backward_Adam(self,loss):    

        optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

### Initializing module (Check)

In [16]:
def initializing(network, initial_x, initial_y):
    print("Initializing module")
    ## Find each minimum output value y
    min_y = torch.min(initial_y, axis=0)

    ## Subtract min_y from each y
    res_y = initial_y-min_y.values
    
    ## Use linear regression to find the initial W1,b1,Wo,bo
    reg = LinearRegression().fit(initial_x, res_y)
#     ## Set up the initial parameter of the network
    network.linear1.weight = torch.nn.Parameter(torch.FloatTensor(reg.coef_))
    network.linear1.bias = torch.nn.Parameter(torch.FloatTensor(reg.intercept_))
    network.linear2.weight=torch.nn.Parameter(torch.FloatTensor([[1,0,0,0], [0,1,0,0],[0,0,1,0],[0,0,0,1]]))
    network.linear2.bias = torch.nn.Parameter(min_y.values)

    ## Set up the acceptable of the initial network as True
    network.acceptable =True

### Selecting module (Check)

In [17]:
def selecting(network, x_train_scaled, y_train_scaled):
    
    print("<<Selecting module>>")
    loss = []
    temp_network = copy.deepcopy(network)
    
    ## Put each data into network to calculate the loss value
    for i in range(x_train_scaled.shape[0]):
        temp_network.setData(x_train_scaled[i].reshape(1,-1), y_train_scaled[i].reshape(1,-1))
        loss.append((temp_network.forward()[1].item(),i))

#     ## Sort the data according to the loss value from smallest to largest, and save the data index in sorted_index
    sorted_index = [sorted_data[1] for sorted_data in sorted(loss, key = lambda x:x[0])]

    ## Print out some info for debug
    print("The loss value of k:",loss[sorted_index[0]])
#     print("The second_loss value of k:",loss[sorted_index[1]])
    print("Selecting module finish!")
    
    return sorted_index

### Matching module (Check)

In [18]:
# def matching(network):

#     print("<<Matching module>>")
#     ## Set up the learning rate of the network
#     network.learning_rate = 1e-3
    
#     for i in range(100):
        
#         yo, loss = network.forward()
        
#         if torch.all(torch.abs(yo-network.y) <= network.threshold_for_error):
            
#             ## If true, set the acceptable of the network as true and return it
#             network.acceptable = True
#             print("Matching finished - the network is acceptable")
#             return(network)
        
#         elif i==99:
            
#             network.acceptable = False
#             print("Matching finished - the network is Unacceptable")
#             return(network)
        
#         else:
            
#             network.backward_Adam(loss)
#             print("Update the parameters: %d times" %i)

In [19]:
def matching(network):

    network.learning_rate = 1e-3
    while True:
        yo, loss = network.forward()
        if torch.max(torch.abs(yo-network.y)) <= network.threshold_for_error:
            
            network.acceptable = True
            print("Matching finished - the network is acceptable")
            return(network)
        else:
            network_pre = copy.deepcopy(network)
            loss_pre = loss
            network.backward_Adam(loss)
            yo, loss = network.forward()
            if loss < loss_pre:
                network.learning_rate *= 1.2      
            else:
                network = network_pre
                if network.learning_rate <= network.threshold_for_lr:
                    network.acceptable = False
                    print("Matching finished - the network is Unacceptable")
                    return(network)
                else:
                    network.learning_rate *= 0.7

In [20]:
# def matching(network):

#     print("<<Matching module>>")
#     ## Set up the learning rate of the network
#     network.learning_rate = 1e-3
#     yo, loss = network.forward()
    
#     while True:
            
        
# #         print("loss1", loss)
        
#         # Save the current papameter
#         network_pre = copy.deepcopy(network)

#         # Stroe the last loss value
#         loss_pre = loss
# #         print("loss2", loss_pre)
# #         print("<<Before>>")
# #             print("lr",network.learning_rate)
# #         print("W1",network.linear1.weight)
            
#             # Backward and check the loss performance of the network with new learning rate
#         network.backward_Adam(loss)
#         yo, loss = network.forward()
            
# #         print("los3", loss)
           
# #         print("<<After>>")
# #             print("lr",network.learning_rate)
# #         print("W1",network.linear1.weight)
            

#             # Confirm whether the loss value of the adjusted network is smaller than the current one
#         if loss < loss_pre:

#             # If true, multiply the learning rate by 1.2
#             network.learning_rate *= 1.2
# #                 print("<<Enlarge>>")
#         # On the contrary, reduce the learning rate
#         else:         

#             # Identify whether the current learning rate is less than the threshold
#             if network.learning_rate <= network.threshold_for_lr:

#                 # If true, set the acceptable of the network as false and return it
#                 network.acceptable = False
#                 print("Matching finished - the network is Unacceptable")
#                 return(network)

#             # On the contrary, restore w and adjust the learning rate
#             else:


#                 # Restore the papameter of the network
#                 network = copy.deepcopy(network_pre)
#                 network.learning_rate *= 0.7

# #                     print("<<After>>")
# #                     print(network.learning_rate)
# #                     print("<<Shrink>>")


#         ## Identify that all forecast value has met the error term
#         if torch.all(torch.abs(yo-network.y) <= network.threshold_for_error):
            
#             ## If true, set the acceptable of the network as true and return it
#             network.acceptable = True
#             print("Matching finished - the network is acceptable")
#             return(network)

In [21]:
# def matching(network):

#     print("<<Matching module>>")
#     ## Set up the learning rate of the network
#     network.learning_rate = 1e-3
    
    
#     while True:
    
#         ## Get the loss value of the current network architecture
#         yo, loss = network.forward()

#         ## Identify that all forecast value has met the error term
#         if torch.all(torch.abs(yo-network.y) <= network.threshold_for_error):
            
#             ## If true, set the acceptable of the network as true and return it
#             network.acceptable = True
#             print("Matching finished - the network is acceptable")
#             return(network)


#         ## If the error is not satisfied, continue to tunning the learning rate of the network
#         else:
            
#             print("loss1", loss)
#             # Save the current papameter
#             network_pre = copy.deepcopy(network)
            
#             # Stroe the last loss value
#             loss_pre = loss
#             print("loss2", loss_pre)
#             print("<<Before>>")
# #             print("lr",network.learning_rate)
#             print("W1",network.linear1.weight)
            
#             # Backward and check the loss performance of the network with new learning rate
#             network.backward_Adam(loss)
#             yo, loss = network.forward()
            
#             print("loss3", loss)
            
#             print("<<After>>")
# #             print("lr",network.learning_rate)
#             print("W1",network.linear1.weight)
            

#             # Confirm whether the loss value of the adjusted network is smaller than the current one
#             if loss < loss_pre:

#                 # If true, multiply the learning rate by 1.2
#                 network.learning_rate *= 1.2
# #                 print("<<Enlarge>>")
#             # On the contrary, reduce the learning rate
#             else:         
                
#                 # Identify whether the current learning rate is less than the threshold
#                 if network.learning_rate <= network.threshold_for_lr:
                    
#                     # If true, set the acceptable of the network as false and return it
#                     network.acceptable = False
#                     print("Matching finished - the network is Unacceptable")
#                     return(network)

#                 # On the contrary, restore w and adjust the learning rate
#                 else:
                    
                    
#                     # Restore the papameter of the network
#                     network = network_pre
#                     network.learning_rate *= 0.7
                    
# #                     print("<<After>>")
# #                     print(network.learning_rate)
# #                     print("<<Shrink>>")

In [29]:
torch.set_printoptions(sci_mode=False)

In [30]:
x_train, x_test, y_train, y_test = get_data(4)

# x_train_scaled = torch.FloatTensor(sc.fit_transform(x_train_data))
# x_test_scaled = torch.FloatTensor(sc.transform(x_test_data))
# y_train_scaled = torch.FloatTensor(sc.fit_transform(y_train_data))


initial_x = torch.FloatTensor(np.round(x_train[:x_train.shape[1]+1],0))
initial_y = torch.FloatTensor(np.round(y_train[:x_train.shape[1]+1],0))

x_train = torch.FloatTensor(x_train[x_train.shape[1]+1:])
y_train = torch.FloatTensor(y_train[x_train.shape[1]+1:])

network = Network(4,initial_x,initial_y)
initializing(network, initial_x, initial_y)

yo,loss = network.forward()
print(torch.abs(network.y-yo))


# Training of all data
for i in range(0, 10):
    
    
    ## Print out some info for debug
    print("\nThe data index: %d"%(i+x_train.shape[1]+2))
    
    ## Get the data index by selecting module to sort the data by the loss value from smallest to largest
    sorted_index = selecting(network, x_train, y_train)
    
    ## Add new data for training
    network.addData(x_train[sorted_index[0]], y_train[sorted_index[0]])
    x_train_scaled = np.delete(x_train, sorted_index[0], 0)
    y_train_scaled = np.delete(y_train, sorted_index[0], 0)
    
    print("<<Before>>")
    yo,loss = network.forward()
#     print(network.learning_rate)
#     print(network.linear1.weight)
    print(torch.abs(yo-network.y))
#     print(network.state_dict())
    
    network = matching(network)
    
    print("<<After>>")
    yo,loss = network.forward()
#     print(network.learning_rate)
#     print(network.linear1.weight)
    print(torch.abs(yo-network.y))
#     print(network.state_dict())
#     print("Added data:",x_train_scaled[sorted_index[0]], y_train_scaled[sorted_index[0]])
#     yo,loss = network.forward()
#     print("<<Before>>")
#     print("lr",network.learning_rate)
#     print(torch.abs(network.y-yo))
    
   
    
#     network.acceptable = False
    
#     network = matching(network)
    
    
#     print("<<After>>")
#     print("lr",network.learning_rate)
#     print(torch.abs(network.y-yo))
    
#     cramming(network)

#     ## Print out some information for debug
    
    
#     ## Determine whether the forecast value can meet the error term
#     if torch.all(torch.abs(yo-network.y) <= network.threshold_for_error):
        
#         print("\n<<Step4>>")
# #         ## If true, set up the acceptable of the network as true
#         network.acceptable = True
# #         ## Use reorganizing module to adjust the model
#         network = reorganizing(network)
        
#         ## Record the number of runs
#         nb_step4 += 1
 
    # Else (if the forecast value cannot meet the error term)
#     else:
        
# #         ## If false, set up the acceptable of the network as false
#         network.acceptable = False
#         network_pre = copy.deepcopy(network)
        
# #         ## Use matching module to adjust the model
#         network = matching(network)
        
# #         ## If the output of the matching module is an acceptable network, use the reorganization module to adjust the model
#         if network.acceptable:
            
#             print("\n<<Step6.1>>")
#             network = reorganizing(network)
            
#             ## Record the number of runs
# #             nb_step6_1 += 1
 
#         ## Else (if the output of the matching module is an unacceptable network)
#         else:
#             network = network_pre
            
#             print("\n<<Step6.2>>")
#             ## Use cramming module and reorganizing module to adjust the model
#             cramming(network)
#             network = reorganizing(network)
            
#             ## Record the number of runs
# #             nb_step6_2 += 1

    ## Record the evolution of adopted hidden nodes 
#     network.nb_node_acceptable = tf.concat([network.nb_node_acceptable, [network.b1.shape[0]]],0)
    
    ## Print out the model status
#     print("The network status:",network.acceptable)

Initializing module
tensor([[ 38.7968750000, 161.9687500000,  31.5078125000,  15.4335937500],
        [149.2968750000, 225.1875000000,  91.0312500000,  28.4414062500],
        [271.9218750000, 147.3437500000, 326.8046875000, 299.5898437500],
        [  0.0000000000, 199.0937500000,  43.7343750000,  79.3359375000],
        [255.9218750000, 305.9687500000,  85.8437500000, 127.3828125000],
        [388.1718750000, 174.7812500000,   1.3203125000, 127.6914062500],
        [375.7656250000, 373.0781250000, 238.7421875000, 197.6328125000],
        [351.1718750000, 298.5781250000, 260.4765625000, 121.7539062500],
        [635.8437500000,  77.9375000000,  19.8828125000, 388.8750000000],
        [ 46.7031250000, 406.1875000000, 562.6796875000, 459.5312500000],
        [182.8437500000,  53.5312500000, 443.7968750000,   0.8710937500],
        [585.0468750000, 235.4375000000,  38.2890625000,  10.6445312500],
        [237.5312500000, 114.3437500000,  86.8906250000, 296.6132812500],
        [131.26562

## Cramming module (Check)

In [49]:
def cramming(network):
    
    torch.random.manual_seed(0)
    print("<<Cramming module>>")

    
    ## Find unsatisfied data:K
    yo, loss = network.forward()
    undesired_index = torch.nonzero(torch.abs(yo-network.y) > network.threshold_for_error)

    ## Print out the undesired_index for debug
    print("The index of the undesired data:",undesired_index)


    # Unsatisfied situation
    ## Find the index of the unsatisfied data
    k_data_num = undesired_index[0][0]

    undesired_data = torch.reshape(network.x[k_data_num,:], [1,-1])

    ## Remove the data that does not meet the error term
    left_data = network.x[:k_data_num,:]
    right_data = network.x[k_data_num+1:,:]
    remain_tensor = torch.cat([left_data, right_data], 0)

        
    ## Use the random method to find out the gamma and zeta
    while True:

        ## Find m-vector gamma: r
        ## Use the random method to generate the gamma that can make the conditions met
        gamma = torch.rand(size=[1,network.x.shape[1]])
        subtract_undesired_data = torch.sub(remain_tensor, undesired_data)
        matmul_value = torch.mm(gamma,torch.t(subtract_undesired_data))

        if torch.all(matmul_value != 0):
            break

    while True:

        ## Find the tiny value: zeta
        ## Use the random method to generate the zeta that can make the conditions met
        zeta = torch.rand(size=[1])
        
        if torch.all(torch.mul(torch.add(zeta,matmul_value),torch.sub(zeta,matmul_value))<0):
            break

    for i in range(undesired_index.shape[0]):
        
        k_l = undesired_index[i][1]
        print("The output node:",k_l)
        ## The weight of input layer to hidden layer I
        w10 = gamma
        w11 = gamma
        w12 = gamma
        
        W1_new = torch.cat([w10,w11,w12],0)
        print("W1_new.shape:",W1_new.shape)
        
        ## The bias of input layer to hidden layer I
        matual_value = torch.mm(gamma,torch.t(undesired_data))

        b10 = torch.sub(zeta,matual_value)
        b11 = -1*matual_value
        b12 = torch.sub(-1*zeta,matual_value)
        
        b1_new = torch.reshape(torch.cat([b10,b11,b12],0),[3])
       
        print("b1_new.shape:",b1_new.shape)
    
    
        ## The weight of hidden layer I to output layer
        gap = network.y[k_data_num, k_l]-yo[k_data_num, k_l]
        print("gap:",gap)
        
        wo0_value = gap/zeta
        wo1_value = (-2*gap)/zeta
        wo2_value = gap/zeta

        index = torch.tensor([[k_l]])
    
        wo0 = torch.zeros(1, 4).scatter_(1, index, 1) * wo0_value
        wo1 = torch.zeros(1, 4).scatter_(1, index, 1) * wo1_value
        wo2 = torch.zeros(1, 4).scatter_(1, index, 1) * wo2_value
        
        print("Sum:",wo0_value+wo1_value+wo2_value)
        
        Wo_new = torch.t(torch.cat([wo0,wo1,wo2],0))
        
        print("Wo_new.shape",Wo_new.shape)
        
        ## Add new neuroes to the network
        network.linear1.weight = torch.nn.Parameter(torch.cat([network.linear1.weight.data, W1_new]))
        network.linear1.bias = torch.nn.Parameter(torch.cat([network.linear1.bias.data, b1_new]))
        network.linear2.weight = torch.nn.Parameter(torch.cat([network.linear2.weight.data, Wo_new],1))
        
        yo, loss = network.forward()

#         print(torch.abs(network.y-yo))
    
    yo, loss = network.forward()
    ## Determine if cramming is successful and print out the corresponding information
    if torch.all(torch.abs(yo[k_data_num,k_l]-network.y[k_data_num,k_l]) <= network.threshold_for_error):
        network.acceptable = True 
        print("Cramming success!")
        
    else:
        print("Cramming failed!")

### Regularizing module (Check)

In [58]:
def regularizing(network):

    print("<<Regularizing module>>")
    ## Record the number of executions
    times_enlarge = 0
    times_shrink = 0
    ## Set up the learning rate of the network
    network.learning_rate = 1e-3

    ## Set epoch to 100
    for i in range(100):

        ## Store the parameter of the network
        network_pre = copy.deepcopy(network)
        yo, loss = network.forward(1e-2)
        loss_pre = loss

        ## Backward operation to obtain w'
        network.backward_Adam(loss)
        yo, loss = network.forward(1e-2)

         # Confirm whether the adjusted loss value is smaller than the current one
        if loss <= loss_pre:
            
            ## Identify that all forecast value has met the error term
            if torch.all(torch.abs(yo-network.y) <= network.threshold_for_error):
                
                ## If true, multiply the learning rate by 1.2
                network.learning_rate *= 1.2
                times_enlarge += 1
                print("Regularizing %d process - Enlarge"%i)

            else:

                ## Else, restore w and end the process
                network = network_pre
                print("Number of enlarge:",times_enlarge)
                print("Number of shrink:",times_shrink)
                print("Regularizing result: Unable to meet the error term")
                return(network)

        # If the adjusted loss value is not smaller than the current one
        else:
           

            ## If the learning rate is greater than the threshold for learning rate
            if network.learning_rate > network.threshold_for_lr:
                
                ## Restore the w and multiply the learning rate by 0.7
                network = network_pre
                network.learning_rate *= 0.7
                times_shrink += 1
                print("Regularizing %d process - Shrink"%i)
             ## If the learning rate is smaller than the threshold for learning rate
            else:
                
                ## Restore the w
                network = network_pre
                print("Number of enlarge:",times_enlarge)
                print("Number of shrink:",times_shrink)
                print("Regularizing result: Less than the epsilon for the learning rate")
                return(network)

        if i == 99:
            print("Number of enlarge:",times_enlarge)
            print("Number of shrink:",times_shrink)
            print("Regularizing result: The number of rounds has reached")
            return(network)

### Reorganizing module (Check)

In [59]:
def reorganizing(network):
    print("<<Reorganizing module>>")
    ## Set up the k = 1, and p = the number of hidden node
    k = 1
#     p = network.W1.shape[1]
    p = network.linear1.weight.data.shape[0]
    
    while True:

        ## If k > p, end of Process
        if k > p:

            print("Reorganizing result: The final number of neuro is ",p)
            return(network)

        ## Else, Process is ongoing
        else:

            ## Using the regularizing module to adjust the network
            network = regularizing(network)
            
            ## Store the network and w
            network_pre = copy.deepcopy(network)

            ## Set up the acceptable of the network as false
            network.acceptable = False

            ## Ignore the K hidden node
            network.linear1.weight = torch.nn.Parameter(torch.cat([network.linear1.weight[:k-1],network.linear1.weight[k:]],0))
            network.linear1.bias = torch.nn.Parameter(torch.cat([network.linear1.bias[:k-1],network.linear1.bias[k:]]))
            network.linear2.weight = torch.nn.Parameter(torch.cat([network.linear2.weight[:,:k-1],network.linear2.weight[:,k:]],1))

            ## Using the matching module to adjust the network
            network = matching(network)
            print(network.acceptable)
            ## If the resulting network is acceptable, this means that the k hidden node can be removed
            if network.acceptable and p!=1:

                print("Drop out the nero number: %d / %d" %(k, p))
#                 network.nb_node_pruned += 1
                ## p--
                p-=1

            ## Else, it means that the k hidden node cannot be removed
            else:
                
                ## Restore the network and w
                network = network_pre
                print("Cannot drop out the nero number: %d / %d" %(k, p))
                
                ## k++
                k+=1
                    

<generator object Module.parameters at 0x7f73a75c1d58>
Initializing module
tensor([[2.8610e-06, 9.8348e-06, 7.1526e-07, 0.0000e+00],
        [3.1590e-06, 4.5300e-06, 0.0000e+00, 1.1325e-06],
        [2.9802e-06, 0.0000e+00, 2.9802e-07, 0.0000e+00],
        [3.6955e-06, 6.1393e-06, 7.1526e-07, 1.7881e-07],
        [1.1325e-06, 3.0994e-06, 5.9605e-08, 2.3842e-07],
        [3.4571e-06, 8.5235e-06, 9.5367e-07, 1.3113e-06],
        [6.3777e-06, 4.7684e-06, 5.9605e-07, 2.3842e-07],
        [4.4107e-06, 3.9339e-06, 4.7684e-07, 2.9802e-07],
        [9.0599e-06, 1.0490e-05, 1.3709e-06, 8.3447e-07],
        [3.2187e-06, 4.3511e-06, 1.0729e-06, 6.5565e-07],
        [8.6427e-06, 2.5034e-06, 7.7486e-07, 4.7684e-07],
        [9.5367e-07, 6.1393e-06, 7.1526e-07, 1.1325e-06],
        [4.5896e-06, 2.1458e-06, 1.0133e-06, 2.9802e-07],
        [5.9605e-07, 1.0133e-06, 1.1325e-06, 5.9605e-07],
        [6.9737e-06, 5.3048e-06, 1.3113e-06, 4.7684e-07],
        [7.5698e-06, 4.6492e-06, 7.1526e-07, 5.3644e-07

KeyboardInterrupt: 

In [346]:
x_train_data, x_test_data, y_train_data, y_test_data = get_data(4)

x_train_scaled = torch.FloatTensor(sc.fit_transform(x_train_data))
x_test_scaled = torch.FloatTensor(sc.transform(x_test_data))
y_train_scaled = torch.FloatTensor(sc.fit_transform(y_train_data))


initial_x = x_train_scaled[:x_train_scaled.shape[1]+1]
initial_y = y_train_scaled[:x_train_scaled.shape[1]+1]

x_train_scaled = x_train_scaled[x_train_scaled.shape[1]+1:]
y_train_scaled = y_train_scaled[x_train_scaled.shape[1]+1:]

network = Network(4,initial_x,initial_y)
initializing(network, initial_x, initial_y)

    
## Training of all data
for i in range(0, 1):
    
   
    ## Print out some info for debug
    print("\nThe data index: %d"%(i+x_train_scaled.shape[1]+2))
    
    ## Get the data index by selecting module to sort the data by the loss value from smallest to largest
    sorted_index = selecting(network, x_train_scaled, y_train_scaled)
    
    ## Add new data for training
    network.addData(x_train_scaled[sorted_index[0]], y_train_scaled[sorted_index[0]])
    x_train_scaled = np.delete(x_train_scaled, sorted_index[0], 0)
    y_train_scaled = np.delete(y_train_scaled, sorted_index[0], 0)
    
    yo, loss = network.forward()
    
    ## Print out some information for debug
    print("The error term for each data")
    print(torch.abs(yo-network.y))
    
    ## Determine whether the forecast value can meet the error term
    if torch.all(torch.abs(yo-network.y) <= network.threshold_for_error):
        
        print("\n<<Step4>>")
        ## If true, set up the acceptable of the network as true
        network.acceptable = True
        ## Use reorganizing module to adjust the model
        network = reorganizing(network)
        
        ## Record the number of runs
#         nb_step4 += 1
 
    ## Else (if the forecast value cannot meet the error term)
    else:
        
        ## If false, set up the acceptable of the network as false
        network.acceptable = False
        network_pre = copy.deepcopy(network)
        
        ## Use matching module to adjust the model
        network = matching(network)
        
        ## If the output of the matching module is an acceptable network, use the reorganization module to adjust the model
        if network.acceptable:
            
            print("\n<<Step6.1>>")
            network = reorganizing(network)
            
            ## Record the number of runs
            nb_step6_1 += 1
 
        ## Else (if the output of the matching module is an unacceptable network)
        else:
            network = network_pre
            
            print("\n<<Step6.2>>")
            ## Use cramming module and reorganizing module to adjust the model
            cramming(network)
            network = reorganizing(network)
            
            ## Record the number of runs
            nb_step6_2 += 1

    ## Record the evolution of adopted hidden nodes 
#     network.nb_node_acceptable = tf.concat([network.nb_node_acceptable, [network.b1.shape[0]]],0)
    
    ## Print out the model status
    print("The network status:",network.acceptable)

Initializing module

The data index: 20
<<Selecting module>>
The loss value of k: (0.04106106981635094, 0)
The second_loss value of k: (0.07957352697849274, 2)
Selecting module finish!
The error term for each data
tensor([[9.5367e-07, 2.2054e-06, 1.4305e-06, 0.0000e+00],
        [3.1590e-06, 3.0994e-06, 1.6689e-06, 1.7881e-07],
        [8.4639e-06, 0.0000e+00, 1.7285e-06, 0.0000e+00],
        [3.6955e-06, 5.3048e-06, 1.4305e-06, 1.7881e-07],
        [7.7486e-07, 7.1526e-07, 2.2054e-06, 7.7486e-07],
        [3.5763e-07, 4.7088e-06, 1.8477e-06, 6.5565e-07],
        [6.3777e-06, 8.9407e-07, 1.6093e-06, 7.1526e-07],
        [2.4438e-06, 5.9605e-08, 7.1526e-07, 2.9802e-07],
        [5.3048e-06, 9.5367e-07, 1.3709e-06, 8.3447e-07],
        [4.4107e-06, 4.3511e-06, 1.5497e-06, 6.5565e-07],
        [4.8280e-06, 3.2187e-06, 1.0133e-06, 0.0000e+00],
        [6.6757e-06, 6.1393e-06, 1.6689e-06, 6.5565e-07],
        [3.0398e-06, 2.1458e-06, 1.7285e-06, 2.9802e-07],
        [3.2187e-06, 4.8280e-06,

### Construct a instance of network
- trained through the matching module, reorganizing module, and cramming module

In [26]:
start = time.time()
nb_step4 = 0
nb_step6_1 = 0
nb_step6_2 = 0

## Call the help method "get_data(...)" to get the training data and test data 
x_train_data, x_test_data, y_train_data, y_test_data = get_data(4)

## Use min-max normalization to normalize data in a range of 0 to 1
x_train_scaled = sc.fit_transform(x_train_data)
x_test_scaled = sc.transform(x_test_data)
y_train_scaled = sc.fit_transform(y_train_data)


## Pick up m+1 data that are linearly independent as the initial m+1 training data 
initial_x = x_train_scaled[:x_train_scaled.shape[1]+1]
initial_y = y_train_scaled[:x_train_scaled.shape[1]+1]

## The remaining data (that exclude initial data)
x_train_scaled = x_train_scaled[x_train_scaled.shape[1]+1:]
y_train_scaled = y_train_scaled[x_train_scaled.shape[1]+1:]

## Construct the network object with 4 neuros
network = Network(4, initial_x, initial_y)

## Use initilize module to set up the initial network
initializing(network, initial_x, initial_y)

yo, loss, tape = network.forward()
print("note: the error term after initializing module")
print(tf.math.abs(yo-network.y))


## Training of all data
for i in range(0, x_train_scaled.shape[0]):
    
   
    ## Print out some info for debug
    print("\nThe data index: %d"%(i+x_train_scaled.shape[1]+2))
    
    ## Get the data index by selecting module to sort the data by the loss value from smallest to largest
    sorted_index = selecting(network, x_train_scaled, y_train_scaled)
    
    ## Add new data for training
    network.addData(x_train_scaled[sorted_index[0]], y_train_scaled[sorted_index[0]])
    x_train_scaled = np.delete(x_train_scaled, sorted_index[0], 0)
    y_train_scaled = np.delete(y_train_scaled, sorted_index[0], 0)
    
    yo, loss, tape = network.forward()
    
    ## Print out some information for debug
    print("The error term for each data")
    print(tf.math.abs(yo-network.y))
    
    ## Determine whether the forecast value can meet the error term
    if tf.reduce_all(tf.math.abs(yo-network.y) <= network.threshold_for_error):
        
        print("\n<<Step4>>")
        ## If true, set up the acceptable of the network as true
        network.acceptable = True
        
        ## Use reorganizing module to adjust the model
        network = reorganizing(network)
        
        ## Record the number of runs
        nb_step4 += 1
 
    else:
        
        ## If true, set up the acceptable of the network as false
        network.acceptable = False
        network_pre = copy.deepcopy(network)
        
        ## Use matching module to adjust the model
        network = matching(network)
        
        ## If the output of the matching module is an acceptable network, use the reorganization module to adjust the model
        if network.acceptable:
            
            print("\n<<Step6.1>>")
            network = reorganizing(network)
            
            ## Record the number of runs
            nb_step6_1 += 1
 
        ## Else (if the output of the matching module is an unacceptable network)
        else:
            network = network_pre
            
            print("\n<<Step6.2>>")
            ## Use cramming module and reorganizing module to adjust the model
            cramming(network)
            network = reorganizing(network)
            
            ## Record the number of runs
            nb_step6_2 += 1

    ## Print out the model status
    network.nb_node_acceptable = tf.concat([network.nb_node_acceptable, [network.b1.shape[0]]],0)
    print("The network status:",network.acceptable)
    
    if i == 50:

        print("\nPrint out some info")
        validation(network,nb_step4,nb_step6_1,nb_step6_2,x_test_scaled,y_test_data)
        break
    print("-"*10,"next data","-"*10)
    
## Calculate the training time    
end = time.time()

Initializing module
note: the error term after initializing module
tf.Tensor(
[[0.0000037  0.0000034  0.0000006  0.        ]
 [0.00000042 0.00000083 0.         0.00000018]
 [0.00000191 0.00000203 0.0000003  0.        ]
 [0.00000095 0.00000328 0.00000012 0.00000018]
 [0.00000161 0.00000131 0.00000054 0.00000024]
 [0.00000048 0.00000089 0.00000024 0.00000036]
 [0.00000209 0.00000083 0.00000024 0.00000024]
 [0.00000024 0.00000024 0.00000036 0.0000003 ]
 [0.00000131 0.00000274 0.00000018 0.00000036]
 [0.00000024 0.00000066 0.00000048 0.00000018]
 [0.00000018 0.00000119 0.00000018 0.        ]
 [0.00000179 0.00000054 0.00000012 0.00000018]
 [0.00000352 0.00000346 0.00000042 0.0000003 ]
 [0.00000334 0.00000101 0.0000003  0.0000006 ]
 [0.00000173 0.00000221 0.         0.00000048]
 [0.00000113 0.00000095 0.         0.00000006]
 [0.00000179 0.00000203 0.00000006 0.00000012]
 [0.00000095 0.00000149 0.00000048 0.00000012]
 [0.00000149 0.         0.00000012 0.00000018]], shape=(19, 4), dtype=float3

ResourceExhaustedError: OOM when allocating tensor with shape[51,46] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Relu]

In [None]:
# def plot_result():
#     fig, ax = plt.subplots(2,2,figsize=(20,10), sharex=True, sharey=True)

#     for i in range(yo.shape[1]):
#         ax[i//2,i%2].plot(sc.inverse_transform(yo)[:,i], label="LLAAT")
#         ax[i//2,i%2].plot(sc.inverse_transform(network.y)[:,i], label="Actual")
#         ax[i//2,i%2].set_title("Forecasted performance for the %d week ahead" %(i+1))
#         ax[i//2,i%2].legend()
#     #fig.text(0.5, 0, "Stage of training", ha='center', fontsize=20)
#     #fig.text(0, 0.5, "Copper price value", va='center', rotation='vertical')
#     fig.tight_layout()

In [None]:
# forecast_value = network.forecast(x_test_scaled)
# plt.plot(sc.inverse_transform(forecast_value)[:,0], label="LLAAT")
# plt.plot(y_test_data[:,0], label="Actual")
# plt.legend()

In [None]:
# forecast_value = network.forecast(x_test_scaled)

# y_pred = sc.inverse_transform(forecast_value)
# accuracy = list()

# for i in range(y_pred.shape[1]):
# #     for _ in range(y_pred.shape[0]): 

#     correct_times = np.sum(tf.math.abs(y_test_data[:,i]-y_pred[:,i])<2000)
#     accuracy.append(correct_times/y_pred.shape[0])

In [None]:
# fig, ax = plt.subplots(2,2,figsize=(20,10), sharex=True, sharey=True)

# for i in range(yo.shape[1]):
#     ax[i//2,i%2].plot(sc.inverse_transform(forecast_value)[:,i], label="LLAAT")
#     ax[i//2,i%2].plot(y_test_data[:,i], label="Actual")
#     ax[i//2,i%2].set_title("Forecasted performance for the %d week ahead" %(i+1))
#     ax[i//2,i%2].legend()
# #fig.text(0.5, 0, "Stage of training", ha='center', fontsize=20)
# #fig.text(0, 0.5, "Copper price value", va='center', rotation='vertical')
# fig.tight_layout()