# Multivariate Time-Series Forecasting: Predict Iron Prices

### Import libs

In [None]:
import numpy as np
import pandas as pd
import datetime as dt

import matplotlib.pyplot as plt
from pylab import rcParams


from sklearn.preprocessing import StandardScaler

from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import Adam



## Part 1: Data preprocessing

###  Read and load the data; we want to select the features for the training and prediction and extract the date column then make it the index

Index & Date columns...

check for null values:

In [51]:
dataset_train= pd.read_excel(r'C:\Users\fnafisa\WORKSPACE\professional\Project PoC\Input\data\Construction materials\segregated\حديد تسليح 12مم وطني.xlsx')
dataset_train.index= dataset_train.pop('date')
dataset_train

Unnamed: 0_level_0,price,oil_brent,tasi,gold,interest_rate,historical
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-01-01,2964.917,114.54,7043.55,1662.00,0.14,2615.09244
2013-02-01,2964.750,110.42,6998.33,1578.90,0.15,2964.91700
2013-03-01,2964.942,109.79,7125.73,1595.70,0.14,2964.75000
2013-04-01,2964.917,102.04,7179.80,1472.70,0.15,2964.94200
2013-05-01,2964.917,100.20,7404.12,1393.00,0.11,2964.91700
...,...,...,...,...,...,...
2022-05-01,4000.960,115.60,12921.74,1848.40,0.77,4032.60000
2022-06-01,3704.880,109.03,11523.25,1812.00,1.21,4000.96000
2022-07-01,3595.940,103.97,12199.08,1771.50,1.68,3704.88000
2022-08-01,3288.840,95.64,12283.33,1721.40,2.33,3595.94000


In [46]:
# # select features (columns) to be involved into training and predictions
cols= list(dataset_train)[:]


# EXTRACT dates:
# store the date index in a list 
datelist_train= dataset_train.index.strftime('%Y-%m-%d')
datelist_train= [dt.datetime.strptime(date, '%Y-%m-%d').date() for date in datelist_train]


print('Training set shape == {}'.format(dataset_train.shape))
print('All timestamps == {}'.format(len(datelist_train)))
print('Featured selected: {}'.format(cols))
print('Sample of date list: {}...{}'.format(datelist_train[0:2], datelist_train[-3:-1]))

Training set shape == (117, 6)
All timestamps == 117
Featured selected: ['price', 'oil_brent', 'tasi', 'gold', 'interest_rate', 'historical']
Sample of date list: [datetime.date(2013, 1, 1), datetime.date(2013, 2, 1)]...[datetime.date(2022, 7, 1), datetime.date(2022, 8, 1)]


###  Shaping and transformation

In [4]:
# make sure there are no commas in any number
dataset_train= dataset_train[cols].astype(str)
for i in cols: #['iron_12mm', 'oil_brent', 'tasi', 'gold', 'interest_rate', 'historical']
    for j in range(0, len(dataset_train)):
        dataset_train[i][j]= dataset_train[i][j].replace(',','')
dataset_train= dataset_train[cols].astype(float)

# Using multiple predictors (features)
# we need convert (reshape) to a matrix(array) using .to_numpy
training_set= dataset_train.to_numpy()
print('Shape of training set == {}.'.format(training_set.shape))

Shape of training set == (117, 6).


In [6]:
# Feature scaling hence features vary in range
sc= StandardScaler()
training_set_scaled= sc.fit_transform(training_set)

sc_predict= StandardScaler()
A= sc_predict.fit_transform(training_set[:,0:1]) # our target (iron_12mm) / the dependant variable

print('')





## Part 2: Modeling

###  Build the model architecture and Set up the WINDOWED DATAFRAME 

In [7]:
range(dataset_train.shape[1]-1)

range(0, 5)

In [8]:
training_set_scaled[0:1,1:dataset_train.shape[1]]

array([[ 1.87505343e+00, -8.56059301e-01,  9.06211793e-01,
        -7.07336230e-01,  1.88915200e-15]])

In [9]:
# Create a data structure with timestamps
X_train= []
y_train= []
n_future= 6 # number of moths to be predicted
n_past= 12 # the size of the windowed dataframe

for i in range(n_past, len(training_set_scaled) - n_future +1):
    X_train.append(training_set_scaled[i - n_past:i, 1:dataset_train.shape[1]])
    y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)
print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))



# Model structure
model= Sequential()
model.add(LSTM(units=32, return_sequences=True, input_shape=(n_past, X_train.shape[-1])))
model.add(Dropout(.25))
model.add(LSTM(units=10, return_sequences=False))
model.add(Dropout(.20))
model.add(Dense(units=1, activation='linear'))
model.compile(optimizer= Adam(learning_rate=.015), loss= 'mean_squared_error')

X_train shape == (100, 12, 5).
y_train shape == (100, 1).


In [10]:
%%time
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=20, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

tb = TensorBoard('logs')

history = model.fit(X_train, y_train, shuffle=True, epochs=50, callbacks=[es, rlr, mcp, tb], validation_split=0.3, verbose=1, batch_size=32)

Epoch 1/50
Epoch 1: val_loss improved from inf to 1.01859, saving model to weights.h5
Epoch 2/50
Epoch 2: val_loss improved from 1.01859 to 0.97984, saving model to weights.h5
Epoch 3/50
Epoch 3: val_loss did not improve from 0.97984
Epoch 4/50
Epoch 4: val_loss did not improve from 0.97984
Epoch 5/50
Epoch 5: val_loss did not improve from 0.97984
Epoch 6/50
Epoch 6: val_loss did not improve from 0.97984
Epoch 7/50
Epoch 7: val_loss did not improve from 0.97984
Epoch 8/50
Epoch 8: val_loss did not improve from 0.97984
Epoch 9/50
Epoch 9: val_loss did not improve from 0.97984
Epoch 10/50
Epoch 10: val_loss did not improve from 0.97984
Epoch 11/50
Epoch 11: val_loss did not improve from 0.97984
Epoch 12/50
Epoch 12: ReduceLROnPlateau reducing learning rate to 0.007499999832361937.

Epoch 12: val_loss did not improve from 0.97984
Epoch 13/50
Epoch 13: val_loss did not improve from 0.97984
Epoch 14/50
Epoch 14: val_loss did not improve from 0.97984
Epoch 15/50
Epoch 15: val_loss did not im

###  training and testing
## Part 3: Forcasting
### Step 5: Predict the future

In [48]:
# Generate list of sequence of months for predictions w datetime type
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1m').tolist()
datelist_future_= []
for d in datelist_future:
    d= d + dt.timedelta(days=1)
    d= d.date()
    datelist_future_.append(d)
# (datelist_future_)


# Perform predictions
predictions_future = model.predict(X_train[-n_future:])

predictions_train = model.predict(X_train[n_past:])

def datetime_to_timestamp(x):
    '''
        x : a given datetime value (datetime.date)
    '''
    return dt.strptime(x.strftime('%Y%m%d'), '%Y%m%d')


y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)

PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=['predicted']).set_index(pd.Series(datelist_future_))
# PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['predicted']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['predicted']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))

# Convert  to  for PREDCITION_TRAIN
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index#.to_series()#.apply(datetime_to_timestamp)





In [12]:
PREDICTION_TRAIN
res= PREDICTION_TRAIN.append(PREDICTIONS_FUTURE)



  res= PREDICTION_TRAIN.append(PREDICTIONS_FUTURE)


  PREDICTION_TRAIN.append(PREDICTIONS_FUTURE)


Unnamed: 0,predicted
2015-06-01,2385.196045
2015-07-01,2349.726807
2015-08-01,2308.717529
2015-09-01,2258.454834
2015-10-01,2203.059082
...,...
2022-11-01,3077.264893
2022-12-01,3057.854248
2023-01-01,3032.611572
2023-02-01,3018.118652


In [21]:
res= pd.DataFrame()
res['og_price']= dataset_train['price']
res['prediction']=  PREDICTION_TRAIN.append(PREDICTIONS_FUTURE)

  res['prediction']=  PREDICTION_TRAIN.append(PREDICTIONS_FUTURE)


In [23]:
res['prediction']=  PREDICTION_TRAIN.append(PREDICTIONS_FUTURE)

  res['prediction']=  PREDICTION_TRAIN.append(PREDICTIONS_FUTURE)


date
2013-01   NaN
2013-02   NaN
2013-03   NaN
2013-04   NaN
2013-05   NaN
           ..
2022-05   NaN
2022-06   NaN
2022-07   NaN
2022-08   NaN
2022-09   NaN
Freq: M, Name: prediction, Length: 117, dtype: float32

In [19]:
res['iron_prices']= df_iron_12mm

res['date']= res.index
res.columns[-1]
df= res

ax = plt.gca()
df.plot(kind='line',x=res.columns[-1],y='iron_prices',ax=ax)
df.plot(kind='line',x=res.columns[-1],y='predicted', color='red', ax=ax)

plt.show()

In [328]:
res.to_csv(r'C:\Users\fnafisa\WORKSPACE\professional\Project PoC\output\predictions.csv', encoding='utf-8', index=False)
