### Import related package

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

## Initializing module
from sklearn.linear_model import LinearRegression
np.set_printoptions(suppress=True)

## Copy module
import copy

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


### Control memory usage space for GPU

In [2]:
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.3)
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
tf.compat.v1.keras.backend.set_session(sess)

### Preprocessing the data

In [3]:
def read(path):
    return pd.read_csv(path)

In [4]:
def buildTrain(train, pastWeek=4, futureWeek=4, defaultWeek=1):
    X_train, Y_train = [], []
    for i in range(train.shape[0]-futureWeek-pastWeek):
        X = np.array(train.iloc[i:i+defaultWeek])
        X = np.append(X,train["CCSP"].iloc[i+defaultWeek:i+pastWeek])
        X_train.append(X.reshape(X.size))
        Y_train.append(np.array(train.iloc[i+pastWeek:i+pastWeek+futureWeek]["CCSP"]))
    return np.array(X_train), np.array(Y_train)

### Min-max normalization

In [5]:
sc = MinMaxScaler(feature_range = (0, 1))

In [6]:
path = "WeeklyFinalData.csv"
data = read(path)

date = data["Date"]
data.drop("Date", axis=1, inplace=True)
x_data, y_data = buildTrain(data, futureWeek=4)


a = sc.fit_transform(y_data[:,1].reshape(-1,1))
2000/(sc.data_max_-sc.data_min_)

array([0.07812195])

### Design get_data() to get data

In [7]:
def get_data(futureWeek):
    
    ## Read weekly copper price data
    path = "WeeklyFinalData.csv"
    data = read(path)
    
    date = data["Date"]
    data.drop("Date", axis=1, inplace=True)
    
    ## Add time lag (pastWeek=4, futureWeek=1)
    x_data, y_data = buildTrain(data, futureWeek=futureWeek)
    
    x_train_data = x_data[:int(x_data.shape[0]*0.8)]
    x_test_data = x_data[int(x_data.shape[0]*0.8):]
    y_train_data = y_data[:int(x_data.shape[0]*0.8)]
    y_test_data = y_data[int(x_data.shape[0]*0.8):]
#     ## Data split
#     x_train = x_data[0:int(x_data.shape[0]*0.8)]
#     x_test = x_data[int(x_data.shape[0]*0.8):]
    
#     y_train = y_data[0:int(y_data.shape[0]*0.8)]
#     y_test = y_data[int(y_data.shape[0]*0.8):]
    
    ## Normalize
#     x_train_scaled = sc.fit_transform(x_data)
#     y_train_scaled = sc.fit_transform(y_data)
    
#     x_train_scaled = x_data
#     y_train_scaled = y_data
    
#     return (x_train_scaled, y_train_scaled)

    return (x_train_data, x_test_data, y_train_data, y_test_data)

In [8]:
x_train_data, x_test_data, y_train_data, y_test_data = get_data(4)

print(x_train_data.shape)
print(x_test_data.shape)
print(y_train_data.shape)
print(y_test_data.shape)

(377, 18)
(95, 18)
(377, 4)
(95, 4)


### Network class

In [9]:
class Network():
    
    def __init__(self, nb_neuro, x_train_scaled, y_train_scaled):
        
#         x_train_scaled, y_train_scaled = get_data(nb_neuro)
        
        # Stop criteria - threshold
        self.threshold_for_error = 0.07
        self.threshold_for_lr = 1e-6
        
        # Input data
        self.x = tf.convert_to_tensor(x_train_scaled, np.float32)
        self.y = tf.convert_to_tensor(y_train_scaled, np.float32)
        
        # Learning rate
        self.learning_rate = 1e-2
        
        # Optimizer
#         self.optimizer = tf.optimizers.SGD(self.learning_rate)
        
         # Hidden layer I
        self.n_neurons_in_h1 = nb_neuro
        self.W1 = tf.Variable(tf.random.truncated_normal([self.x.shape[1], self.n_neurons_in_h1], mean=0, stddev=1))
        self.b1 = tf.Variable(tf.random.truncated_normal([self.n_neurons_in_h1], mean=0, stddev=1))

        # Output layer
        self.Wo = tf.Variable(tf.random.truncated_normal([self.n_neurons_in_h1, self.y.shape[1]], mean=0, stddev=1))
        self.bo = tf.Variable(tf.random.truncated_normal([self.y.shape[1]], mean=0, stddev=1))

        # Whether the network is acceptable
        self.acceptable = False
    
    def forecast(self, x_test_scaled):
    
        x_test_scaled = tf.cast(x_test_scaled, tf.float32)
        forecast_value = tf.nn.relu((tf.matmul(x_test_scaled, self.W1)+self.b1))
        return forecast_value

    def setData(self, x_train_scaled, y_train_scaled):
        self.x = tf.convert_to_tensor(x_train_scaled, np.float32)
        self.y = tf.convert_to_tensor(y_train_scaled, np.float32)
    
    def addData(self, new_x_train, new_y_train):
#         self.x = tf.convert_to_tensor(x_train_scaled, np.float32)
#         self.y = tf.convert_to_tensor(y_train_scaled, np.float32)
        self.x = tf.concat([self.x, new_x_train.reshape(1,-1)],0)
        self.y = tf.concat([self.y, new_y_train.reshape(1,-1)],0)
    
        # forward operation
    def forward(self,  reg_strength= 0):
        with tf.GradientTape() as tape:

            y1 = tf.nn.relu((tf.matmul(self.x, self.W1)+self.b1))
            yo = (tf.matmul(y1,self.Wo)+self.bo)

            # performance measure
            diff = yo-self.y
            loss = tf.reduce_mean(diff**2) + (reg_strength/(self.Wo.shape[1]*(self.Wo.shape[0]+1)+self.W1.shape[1]*(self.W1.shape[0]+1))) * ((tf.nn.l2_loss(self.W1) + tf.nn.l2_loss(self.Wo) + tf.nn.l2_loss(self.b1) + tf.nn.l2_loss(self.bo))*2)
#             loss = tf.reduce_mean(diff**2, axis=0) + reg_strength * (tf.nn.l2_loss(self.W1) + tf.nn.l2_loss(self.Wo) + tf.nn.l2_loss(self.b1) + tf.nn.l2_loss(self.bo))

        return(yo, loss, tape)

    # backward operation
    def backward_Adam(self,tape,loss):

#         tape.watch([self.W1, self.Wo, self.b1, self.bo])
        optimizer = tf.optimizers.Adam(self.learning_rate)
        gradients = tape.gradient(loss, [self.W1, self.Wo, self.b1, self.bo])
        optimizer.apply_gradients(zip(gradients, [self.W1, self.Wo, self.b1, self.bo]))
    
    def backward_RMS(self,tape,loss):

        optimizer = tf.keras.optimizers.RMSprop(self.learning_rate)
        gradients = tape.gradient(loss, [self.W1, self.Wo, self.b1, self.bo])
        optimizer.apply_gradients(zip(gradients, [self.W1, self.Wo, self.b1, self.bo]))

### Matching module (Check)

In [10]:
# tunning the parameter
def matching(network):

    network.learning_rate = 1e-3
    
    while True:
        
        yo, loss, tape = network.forward()

        if tf.reduce_all(tf.math.abs(yo-network.y) <= network.threshold_for_error):
            
            network.acceptable = True
            print("Matching finished - the network is acceptable")
            return(network)


        else:
            
            # Save the current papameter
            network_pre = copy.deepcopy(network)
            loss_pre = loss
            
            # tuning and check the loss performance of the next step
            network.backward_Adam(tape,loss)
            yo, loss, tape = network.forward()

            # Confirm whether the adjusted loss value is smaller than the current one
            if loss < loss_pre:

                # Multiply the learning rate by 1.2
                network.learning_rate *= 1.2

            # On the contrary, reduce the learning rate
            else:

                network = network_pre
                
                # Identify whether the current learning rate is less than the threshold
                if network.learning_rate <= network.threshold_for_lr:
                    network.acceptable = False
                    # If true, return the current model parameters
                    print("Matching finished - the network is Unacceptable")
                    return(network)

                # On the contrary, maintain the original parameter and adjust the learning rate
                else:
                    network.learning_rate *= 0.7
#                     print("B",network.learning_rate)

### Initializing module (Check)

In [11]:
def initializing(network, initial_x, initial_y):
    
#     x_train_scaled, y_train_scaled = get_data(4)

#     initial_x = x_train_scaled[:x_train_scaled.shape[1]+1]
#     initial_y = y_train_scaled[:x_train_scaled.shape[1]+1]
    min_y = tf.reduce_min(initial_y, axis=0)
    res_y = initial_y-min_y
    reg = LinearRegression().fit(initial_x, res_y)

    network.W1 = tf.Variable(tf.cast(tf.transpose(reg.coef_), tf.float32))
    network.b1 = tf.Variable(tf.convert_to_tensor(reg.intercept_, tf.float32))
    network.Wo = tf.Variable(tf.convert_to_tensor([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]], tf.float32))
    network.bo = tf.Variable(tf.cast(min_y, tf.float32))

    network.acceptable =True
#     network.W1 = tf.cast(tf.transpose(reg.coef_), tf.float32)
#     network.b1 = tf.convert_to_tensor(reg.intercept_, tf.float32)
#     network.Wo = tf.convert_to_tensor([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]], tf.float32)
#     network.bo = tf.cast(min_y, tf.float32)

### Selecting module (Check)

In [12]:
def selecting(network, x_train_scaled, y_train_scaled):
    
    loss = []
    temp_network = copy.deepcopy(network)
    
    for i in range(x_train_scaled.shape[0]):
        temp_network.setData(x_train_scaled[i].reshape(1,-1), y_train_scaled[i].reshape(1,-1))
        loss.append((temp_network.forward()[1].numpy(),i))

    sorted_index = [sorted_data[1] for sorted_data in sorted(loss, key = lambda x:x[0])]
#     sorted_index = [x[1] for x in sorted(loss, key = lambda x:sum(x[0]))]
#     sorted_index = [x for x in sorted(loss, key = lambda x:x[0])]

    
    print("First:",loss[sorted_index[0]])
    print("Second:",loss[sorted_index[1]])
    print("Selecting module finish!")
    
    return sorted_index

### Regularizing module (Check)

In [13]:
def regularizing(network):

    
    network.learning_rate = 1e-3

    for i in range(100):

            
#         W1_pre, b1_pre, Wo_pre, bo_pre = network.W1, network.b1, network.Wo, network.bo
        network_pre = copy.deepcopy(network)
        yo, loss, tape = network.forward(1e-2)

        loss_pre = loss

        network.backward_Adam(tape, loss)
        yo, loss, tape = network.forward(1e-2)

        if loss <= loss_pre:
            if tf.reduce_all(tf.math.abs(yo-network.y) <= network.threshold_for_error):
                network.learning_rate *= 1.2
                print("Regularizing process")

            else:
#                 network.W1, network.b1, network.Wo, network.bo = W1_pre, b1_pre, Wo_pre, bo_pre
                network = network_pre

                print("Regularizing finished(A)")
                return(network)
#                     break

        else:


            network = network_pre

            if network.learning_rate > network.threshold_for_lr:
                network.learning_rate *= 0.7

            else:

                print("Regularizing finished(B)")
                return(network)
#                     break

        if i == 99:
            
            return(network)

### Reorganizing module (Check)

In [14]:
def reorganizing(network):
    
#     if network.acceptable:
        
    k = 1
    p = network.W1.shape[1]

    while True:



        if k > p:

            print("The number of neuro: ",p)
            return(network)

        else:


            network = regularizing(network)
            network_pre = copy.deepcopy(network)

            network.acceptable = False

            network.W1 = tf.Variable(tf.concat([network.W1[:,:k-1],network.W1[:,k:]],1))
            network.b1 = tf.Variable(tf.concat([network.b1[:k-1],network.b1[k:]],0))
            network.Wo = tf.Variable(tf.concat([network.Wo[:k-1,:],network.Wo[k:,:]],0))

#             print(network.W1.shape, network.Wo.shape, network.b1.shape)
            network = matching(network)

            if network.acceptable:

                print("Drop out the nero number: %d / %d" %(k, p))
                p-=1


            else:
                network = network_pre
                print("Cannot drop out the nero number: %d / %d" %(k, p))
                k+=1
                    
#     else:
#         return None

### Cramming module (Check)

In [15]:
def cramming(network):
    
    ## Test network
# x_train_scaled, y_train_scaled = get_data(4)
# initial_x = x_train_scaled[:x_train_scaled.shape[1]+1]
# initial_y = y_train_scaled[:x_train_scaled.shape[1]+1]

# network = Network(4, initial_x, initial_y)
# initializing(network, initial_x, initial_y)

# sorted_index = selecting(network, x_train_scaled, y_train_scaled)
# network.setData(x_train_scaled[sorted_index[:20]],y_train_scaled[sorted_index[:20]])


    ## Set the random seed
    tf.random.set_seed(5)

    ## Find unsatisfied situation
    yo, loss, tape = network.forward()
    forward_info = [yo, loss, tape]
    
    undesired_index = tf.where(tf.math.abs(yo-network.y) > network.threshold_for_error)

    
#     print(tf.math.abs(yo-network.y))
#     print(network.threshold_for_error)
    print(undesired_index)
#     print(undesired_index.shape[0])

    ## Unsatisfied situation
    for i in range(undesired_index.shape[0]):

        k_data_num = undesired_index[i][0]
        k_l = undesired_index[i][1]

        undesired_data = tf.reshape(network.x[k_data_num,:], [1,-1])

        ## Remove the only data that does not meet the error term
        left_data = network.x[:k_data_num,:]
        right_data = network.x[k_data_num+1:,:]
        remain_tensor = tf.concat([left_data, right_data], 0)

    #     print(tf.subtract(remain_tensor, undesired_data).shape)

        while True:

            ## Find m-vector gamma: r
            gamma = tf.random.uniform(shape=[1,network.x.shape[1]])

            subtract_undesired_data = tf.subtract(remain_tensor, undesired_data)
            matmul_value = tf.matmul(gamma,tf.transpose(subtract_undesired_data))


            if tf.reduce_all(matmul_value != 0):

                while True:

                    ## Find the tiny value: zeta
                    zeta = tf.random.uniform(shape=[1])

                    if tf.reduce_all(tf.multiply(tf.add(zeta,matmul_value),tf.subtract(zeta,matmul_value))<0):
                        break

                break


        ## The weight of input layer to hidden layer I
        w10 = gamma
        w11 = gamma
        w12 = gamma

        W1_new = tf.transpose(tf.concat([w10,w11,w12],0))

    #     ## The bias of input layer to hidden layer I
        matual_value = tf.matmul(gamma,tf.transpose(undesired_data))

        b10 = tf.subtract(zeta,matual_value)
        b11 = -1*matual_value
        b12 = tf.subtract(-1*zeta,matual_value)
        b1_new = tf.reshape(tf.concat([b10,b11,b12],0),[3])

        ## The weight of hidden layer I to output layer
        gap = network.y[k_data_num, k_l]-yo[k_data_num, k_l]

        wo0_value = gap/zeta
        wo1_value = (-2*gap)/zeta
        wo2_value = gap/zeta

        wo0 = tf.reshape(tf.one_hot(k_l,4,dtype=tf.float32) * wo0_value, shape=(1,-1))
        wo1 = tf.reshape(tf.one_hot(k_l,4,dtype=tf.float32) * wo1_value, shape=(1,-1))
        wo2 = tf.reshape(tf.one_hot(k_l,4,dtype=tf.float32) * wo2_value, shape=(1,-1))

        Wo_new = tf.concat([wo0,wo1,wo2],0)


    #     ## Add new neuroes to the network
        network.W1 = tf.Variable(tf.concat([network.W1, W1_new],1), tf.float32)
        network.b1 = tf.Variable(tf.concat([network.b1, b1_new],0), tf.float32)
        network.Wo = tf.Variable(tf.concat([network.Wo, Wo_new],0), tf.float32)

        yo, loss, tape = network.forward()
    #     print(tf.math.abs(yo[k_data_num,k_l]-network.y[k_data_num,k_l]) <= network.threshold_for_error)
        if tf.reduce_all(tf.math.abs(yo[k_data_num,k_l]-network.y[k_data_num,k_l]) <= network.threshold_for_error):
            
            if i==(undesired_index.shape[0]-1):
                network.acceptable = True
            
            print("Cramming success!")
            

        else:
            print("Cramming failed!")
        
# print(network.W1.shape[1])

### Construct a instance of network
- trained through the matching module, reorganizing module, and cramming module

In [16]:
# def main():
    
# x_train_scaled, y_train_scaled = get_data(4)

x_train_data, x_test_data, y_train_data, y_test_data = get_data(4)

# x_train_scaled = x_train_scaled[:20]
# y_train_scaled = y_train_scaled[:20]

# x_train_scaled = sc.fit_transform(x_data[:100])
# x_test_scaled = sc.transform(x_data[100:])
# y_train_scaled = sc.fit_transform(y_data[:100])


x_train_scaled = sc.fit_transform(x_train_data)
x_test_scaled = sc.transform(x_test_data)
y_train_scaled = sc.fit_transform(y_train_data)

initial_n = x_train_scaled.shape[1]+2
number_data = x_train_scaled.shape[0]

print("initial_n:",initial_n, "number_data:", number_data)

x_train_scaled = x_train_scaled[:30]
y_train_scaled = y_train_scaled[:30]


initial_x = x_train_scaled[:x_train_scaled.shape[1]+1]
initial_y = y_train_scaled[:x_train_scaled.shape[1]+1]

x_train_scaled = x_train_scaled[x_train_scaled.shape[1]+1:]
y_train_scaled = y_train_scaled[x_train_scaled.shape[1]+1:]

network = Network(4, initial_x, initial_y)
initializing(network, initial_x, initial_y)


# network.setData(x_train_scaled[:18], y_train_scaled[:18])
# print(network.forward()[1])

for i in range(initial_n, number_data): 
    
    print(x_train_scaled.shape[0])
    print("The data number: %d"%i)
    
    sorted_index = selecting(network, x_train_scaled, y_train_scaled)
#     print("Selecting module finish!")
    
    network.addData(x_train_scaled[sorted_index[0]], y_train_scaled[sorted_index[0]])
    x_train_scaled = np.delete(x_train_scaled, sorted_index[0], 0)
    y_train_scaled = np.delete(y_train_scaled, sorted_index[0], 0)
    
    yo, loss, tape = network.forward()
    
    print(tf.math.abs(yo-network.y))
    
    if tf.reduce_all(tf.math.abs(yo-network.y) <= network.threshold_for_error):
        network.acceptable = True
        network = reorganizing(network)
 
    else:
        
        network.acceptable = False
        network_pre = copy.deepcopy(network)
        network = matching(network)
        
        if network.acceptable:
            network = reorganizing(network)
 
        else:
            network = network_pre
            cramming(network)
            network = reorganizing(network)

#     print(network.b1)
    print(network.acceptable)

initial_n: 20 number_data: 377
11
The data number: 20
First: (0.04087187, 0)
Second: (0.07909024, 2)
Selecting module finish!
tf.Tensor(
[[0.00000376 0.00000101 0.00000083 0.        ]
 [0.00000203 0.00000203 0.00000036 0.00000018]
 [0.00000662 0.00000179 0.00000006 0.00000006]
 [0.00000101 0.00000089 0.00000006 0.00000024]
 [0.00000167 0.00000328 0.00000036 0.00000018]
 [0.00000322 0.00000274 0.00000089 0.00000077]
 [0.00000542 0.00000399 0.0000003  0.00000012]
 [0.00000131 0.0000034  0.         0.00000054]
 [0.00000119 0.00000644 0.0000003  0.00000042]
 [0.00000149 0.00000054 0.0000003  0.00000066]
 [0.00000703 0.00000638 0.0000003  0.00000048]
 [0.00000715 0.00000376 0.00000012 0.00000006]
 [0.00000644 0.00000536 0.00000006 0.00000024]
 [0.00000423 0.0000059  0.00000012 0.        ]
 [0.00000286 0.00000012 0.00000012 0.        ]
 [0.00000066 0.00000417 0.00000012 0.        ]
 [0.00000149 0.00000942 0.00000036 0.        ]
 [0.00000471 0.00000727 0.00000012 0.00000024]
 [0.00000232 0.00

AttributeError: 'NoneType' object has no attribute 'acceptable'

In [None]:
yo, loss, tape = network.forward()
plt.plot(sc.inverse_transform(yo)[:,0], label="LLAAT")
plt.plot(sc.inverse_transform(network.y)[:,0], label="Actual")
plt.legend()

In [None]:
network.forecast(x_test_scaled)

In [None]:
# def forecast(self, x_test_scaled):
    
#     x_test_scaled = tf.cast(x_test_scaled, tf.float32)
#     forecast_value = tf.nn.relu((tf.matmul(x_test_scaled, self.W1)+self.b1))
#     return forecast_value