In [1]:
## Import tensorflow package for modeling
import torch
from torch.autograd import Variable
import torch.optim as optim

## Data processing
import pandas as pd
import numpy as np

## Min-max normalization
from sklearn.preprocessing import MinMaxScaler

## Plot the graph
import matplotlib.pyplot as plt
%matplotlib inline

## Initializing module
from sklearn.linear_model import LinearRegression
np.set_printoptions(suppress=True)

## Copy module
import copy

## Used to calculate the training time
import time

## Set the GUP environment
import os

from matplotlib.ticker import MaxNLocator

In [2]:
def read(path):
    return pd.read_csv(path)

In [3]:
def buildTrain(train, pastWeek=4, futureWeek=4, defaultWeek=1):
    X_train, Y_train = [], []
    for i in range(train.shape[0]-futureWeek-pastWeek):
        X = np.array(train.iloc[i:i+defaultWeek])
        X = np.append(X,train["CCSP"].iloc[i+defaultWeek:i+pastWeek])
        X_train.append(X.reshape(X.size))
        Y_train.append(np.array(train.iloc[i+pastWeek:i+pastWeek+futureWeek]["CCSP"]))
    return np.array(X_train), np.array(Y_train)

### Min-max normalization

In [4]:
## Use min-max normalization to scale the data to the range from 1 to 0
sc = MinMaxScaler(feature_range = (0, 1))

In [5]:
def get_data(futureWeek):
    
    ## Read weekly copper price data
    path = "WeeklyFinalData.csv"
    data = read(path)
    
    date = data["Date"]
    data.drop("Date", axis=1, inplace=True)
    
    ## Add time lag (pastWeek=4, futureWeek=1)
    x_data, y_data = buildTrain(data, futureWeek=futureWeek)


    return (x_data, y_data)

#     return (x_data, y_data)

In [6]:
x_data, y_data = get_data(4)
x_data = sc.fit_transform(x_data)
y_data = sc.fit_transform(y_data)
threshold_for_error = 3000/(sc.data_max_-sc.data_min_)


data = range(x_data.shape[0])
# window_size => the length of training block
window_size = 106
# step_window => step size of each window
step_window = 4
# the split data
splits = []

## Moving window mechnism
for i in range(window_size, len(data), step_window):
    train = np.array(data[i-window_size:i])
    test = np.array(data[i-window_size:i+step_window])
#     test = np.array(data[i:i+step_window])
    splits.append(('TRAIN:', train, 'TEST:', test))

In [9]:
for i_block in range(3):
    x_train = x_data[splits[i_block][1]]
    x_test = x_data[splits[i_block][3]]
    y_train = y_data[splits[i_block][1]]
    y_test = y_data[splits[i_block][3]]
    
#     print("x_train",x_train[:10])
#     print("x_test",x_test[:10])
    print("y_train",y_train[:10])
    print("y_test",y_test[:10])

y_train [[0.89815903 0.92831962 0.84351743 0.80395613]
 [0.92831962 0.84351743 0.80395613 0.81355268]
 [0.84351743 0.80395613 0.81355268 0.84214649]
 [0.80395613 0.81355268 0.84214649 0.84469252]
 [0.81355268 0.84214649 0.84469252 0.92897258]
 [0.84214649 0.84469252 0.92897258 0.98211242]
 [0.84469252 0.92897258 0.98211242 0.97473561]
 [0.92897258 0.98211242 0.97473561 0.99941246]
 [0.98211242 0.97473561 0.99941246 0.97669409]
 [0.97473561 0.99941246 0.97669409 0.975519  ]]
y_test [[0.89815903 0.92831962 0.84351743 0.80395613]
 [0.92831962 0.84351743 0.80395613 0.81355268]
 [0.84351743 0.80395613 0.81355268 0.84214649]
 [0.80395613 0.81355268 0.84214649 0.84469252]
 [0.81355268 0.84214649 0.84469252 0.92897258]
 [0.84214649 0.84469252 0.92897258 0.98211242]
 [0.84469252 0.92897258 0.98211242 0.97473561]
 [0.92897258 0.98211242 0.97473561 0.99941246]
 [0.98211242 0.97473561 0.99941246 0.97669409]
 [0.97473561 0.99941246 0.97669409 0.975519  ]]
y_train [[0.81355268 0.84214649 0.84469252 