## Import related API

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential, layers, backend, optimizers
from tensorflow.keras.layers import Dense, LSTM, Dropout, TimeDistributed, LeakyReLU, GRU, BatchNormalization
from keras.layers.core import RepeatVector
from keras.callbacks import EarlyStopping

from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

%matplotlib inline

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [5]:
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction=0.3)
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
tf.compat.v1.keras.backend.set_session(sess)

In [6]:
#### 取消科學記號

In [7]:
np.set_printoptions(suppress=True)

## Read file

In [8]:
def read(path):
    return pd.read_csv(path)

In [9]:
path = "WeeklyFinalData.csv"
finalData = read(path)

In [10]:
train = finalData

In [11]:
date = train["Date"]
train.drop("Date",axis=1,inplace=True)

In [12]:
# plt.figure(figsize=(15,8))
# plt.plot(train["CCSP"])
# plt.axvline(x=381, color='r', linestyle='--')

# plt.savefig('CCSP.png')

## Visualization for raw data

In [13]:
# def show_raw_visualization(data):

#     fig, axes = plt.subplots(
#         nrows=int(round(data.shape[1]/2,0)), ncols=2, figsize=(15, 20), dpi=80, facecolor="w", edgecolor="k"
#     )

#     for i in range(data.shape[1]):
#         t_data = data.iloc[:,i]
#         ax = t_data.plot(
#             ax=axes[i // 2, i % 2],
#             color="black",
#             title=data.columns[i]
#         )
        
        
#     plt.tight_layout()
#     plt.savefig('raw_data.png')


# show_raw_visualization(train)

## Data preprocessing

### Add lag time as a predicted factor 
1. Add lag time from 1 to 4 for CCSP (Yangtze River nonferrous metals, China)
2. Split the data to Training set & Test set

In [14]:
def buildTrain(train, pastWeek=1, futureWeek=4, defaultWeek=1):
    X_train, Y_train = [], []
    for i in range(train.shape[0]-futureWeek-pastWeek):
        X = np.array(train.iloc[i:i+defaultWeek])
        X = np.append(X,train["CCSP"].iloc[i+defaultWeek:i+pastWeek])
        X_train.append(X.reshape(X.size))
        Y_train.append(np.array(train.iloc[i+pastWeek:i+pastWeek+futureWeek]["CCSP"]))
    return np.array(X_train), np.array(Y_train)

### Min-max scaling 
the data is scaled to a fixed range [0,1]

In [15]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))

### Setting the appearance of the learning graph

In [16]:
def show_raw_visualization(data, lag_time):

    fig, axes = plt.subplots(
        nrows=int(round(data.shape[1]/2,0)), ncols=2, figsize=(15, 5), dpi=80, facecolor="w", edgecolor="k"
    )

    for i in range(data.shape[1]):
        t_data = data.iloc[:,i]
        ax = t_data.plot(
            ax=axes[i % 2],
            color="black",
            title="Lag:{0}, {1} curve".format(lag_time+1, data.columns[i])
        )
    
#     fig.set_size_inches(10,15)
        
    plt.tight_layout()

## Building Model

### 2-layer Neural Network

In [17]:
from tensorflow.keras.metrics import Metric
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import array_ops


class DINKLE_Accuracy(Metric):

    def __init__(self, name="DINKLE_Accuracy", **kwargs):
        super(Metric, self).__init__(name=name, **kwargs)
#         self.total_count = self.add_weight(name = "total_count", initializer=init_ops.zeros_initializer)
#         self.match_count = self.add_weight(name = "match_count", initializer=init_ops.zeros_initializer)
        self.matches_rate = self.add_weight(name = "matches_rate", initializer="zeros")

    def update_state(self, y_true, y_pred, sample_weight=None):
        
#         y_true = tf.convert_to_tensor(sc.inverse_transform(y_true))
#         y_pred = tf.convert_to_tensor(sc.inverse_transform(y_pred))
            
        match_count = tf.reduce_sum(tf.cast(tf.less_equal(tf.abs(y_true- y_pred), 0.02), dtype = tf.float32))
        total_count = y_true.shape[0]
        self.matches_rate = math_ops.div_no_nan(match_count, total_count)

         
    def result(self):
        return  self.matches_rate
    
    def reset_state(self):
        self.matches_rate = tf.zeros(shape=(1, 1))

In [18]:
def buildTwoLayerNN(training_data_shape, designated_units):

    keras.backend.clear_session()
    regressor = Sequential()
    regressor.add(Dense(units=designated_units, activation ="relu", input_dim=training_data_shape, kernel_initializer = "uniform"))
    regressor.add(Dense(units=1)) 

    adam = optimizers.Adam(learning_rate=0.0001,beta_1=0.9,beta_2=0.999, decay=1e-6)
    regressor.compile(optimizer=adam, loss="mean_squared_error", metrics=[tf.keras.metrics.RootMeanSquaredError(), DINKLE_Accuracy()])
#     regressor.summary()

    return regressor

In [19]:
from sklearn.metrics import mean_squared_error

In [26]:
## Using different time lag


## Time lag
X1_train, Y1_train= buildTrain(train, pastWeek=4, futureWeek=1)

## Split date to training & test data

X1_training = pd.DataFrame(X1_train[0:int(X1_train.shape[0]*0.8)])
X1_test = pd.DataFrame(X1_train[int(X1_train.shape[0]*0.8):])

Y1_training = pd.DataFrame(Y1_train[0:int(Y1_train.shape[0]*0.8)])
Y1_test = pd.DataFrame(Y1_train[int(Y1_train.shape[0]*0.8):])

## Normalize
print("---Normalize---")
X1_training_scaled = sc.fit_transform(X1_training)
X1_test_scaled = sc.transform(X1_test)

Y1_training_scaled = sc.fit_transform(Y1_training)
Y1_test_scaled = sc.transform(Y1_test)

#     -----------------<< Covert to tensor >>-----------------------
X1_training_scaled = tf.convert_to_tensor(X1_training_scaled)
X1_test_scaled = tf.convert_to_tensor(X1_test_scaled)

Y1_training_scaled = tf.convert_to_tensor(Y1_training_scaled)
Y1_test_scaled = tf.convert_to_tensor(Y1_test_scaled)


## Training model
input_sahpe = X1_training_scaled.shape[1]
regressor = buildTwoLayerNN(input_sahpe, 30)
history = regressor.fit(X1_training_scaled, Y1_training_scaled, epochs=3, batch_size=10, verbose=0)

#     score = regressor.evaluate(X1_test_scaled, Y1_test_scaled, batch_size=X1_test_scaled.shape[0],verbose=0)

print(len(history.history["loss"]))

---Normalize---
3


In [68]:
for i in range(500, 3001, 500):
    print(round((i/25601),3))

0.02
0.039
0.059
0.078
0.098
0.117


In [29]:
tf.reduce_mean(X1_test_scaled[:10])

<tf.Tensor: shape=(), dtype=float64, numpy=0.472884251997269>

In [30]:
print(precision(a,b))

NameError: name 'precision' is not defined

### SVR using linear/ polynominal/ RBF kernel

In [None]:
def predict_prices(X_training, Y_training, X_testing, Y_testing):
    
    svr_lin = SVR(kernel='linear')
    svr_poly = SVR(kernel = 'poly', C=1e3 , degree = 2 )
    svr_rbf = SVR(kernel = 'rbf', C=1e3 , gamma = 0.1)
    svr_lin.fit(X_training, Y_training)    
    svr_poly.fit(X_training, Y_training)
    svr_rbf.fit(X_training, Y_training)
    

    svrs = [svr_lin, svr_poly, svr_rbf]
    kernel_label = ['Linear', 'Polynomial', 'RBF']
    model_color = ['g', 'b', 'r']

    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 15), sharey=True)
    
    for ix, svr in enumerate(svrs):

#         train_obs = sc.inverse_transform(pd.DataFrame(Y_training))
#         train_pre = sc.inverse_transform(pd.DataFrame(svr.predict(X_training)))

#         test_obs = sc.inverse_transform(pd.DataFrame(Y_testing))
#         test_pre = sc.inverse_transform(pd.DataFrame(svr.predict(X_testing)))

        train_obs = Y_training
        train_pre = svr.predict(X_training)

        test_obs = Y_testing
        test_pre = svr.predict(X_testing)

        axes[0, ix].scatter(range(Y_training.shape[0]), train_obs, color='black', label = 'data')
        axes[0, ix].plot(train_pre, color= model_color[ix], label =kernel_label[ix])
        axes[0, ix].legend()
        axes[0, ix].set_title("Training Step: "+kernel_label[ix]+" kenel")
        axes[0, ix].set_xlabel("Sample index(weekly)")
        axes[0, ix].set_ylabel("Copper price ($/ton)")
        
        axes[1, ix].scatter(range(Y_testing.shape[0]), test_obs, color='black', label = 'data')
        axes[1, ix].plot(test_pre, color= model_color[ix], label =kernel_label[ix])
        axes[1, ix].legend()
        axes[1, ix].set_title("Testing Step: "+kernel_label[ix]+" kenel")
        axes[1, ix].set_xlabel("Sample index(weekly)")
        axes[1, ix].set_ylabel("Copper price ($/ton)")
        
        training_RMSE = np.sqrt(mean_squared_error(train_obs, train_pre))
        testing_RMSE = np.sqrt(mean_squared_error(test_obs, test_pre))

        print("The RMSE of SVR using %s kenel\nTraining step:%.3f\tTest step:%.3f" %(kernel_label[ix], training_RMSE, testing_RMSE))

    fig.suptitle("Support Vector Regression", fontsize=14)
    plt.tight_layout()
    plt.show()

In [None]:
## Using different time lag
for i in range(4):

    print("Lag time = {}".format(i+1))
    ## Time lag
    X1_train, Y1_train= buildTrain(train, pastWeek=i+1, futureWeek=1)

    ## Split date to training & test data
    X1_training = pd.DataFrame(X1_train[0:int(X1_train.shape[0]*0.8)])
    X1_test = pd.DataFrame(X1_train[int(X1_train.shape[0]*0.8):])

    Y1_training = pd.DataFrame(Y1_train[0:int(Y1_train.shape[0]*0.8)])
    Y1_test = pd.DataFrame(Y1_train[int(Y1_train.shape[0]*0.8):])

    ## Normalize
    print("---Normalize---")
    X1_training_scaled = sc.fit_transform(X1_training)
    X1_test_scaled = sc.transform(X1_test)

    Y1_training_scaled = sc.fit_transform(Y1_training)
    Y1_test_scaled = sc.transform(Y1_test)

    ## Training model
    predict_prices(X1_training_scaled, Y1_training_scaled, X1_test_scaled, Y1_test_scaled)