In [None]:
import pandas as pd
import datetime as dt
import requests
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, LSTM
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_absolute_error
%matplotlib inline

In [None]:
path = "Brent_crude_oil.csv"

In [None]:
brent_df=pd.read_csv(path)

In [None]:
brent_df.head(1)

In [None]:
brent_df.shape

In [None]:
brent_df.isna().sum()

In [None]:
brent_df.dropna(inplace = True)

In [None]:
brent_df.isna().sum()

In [None]:
brent_df.shape

In [None]:
df1=brent_df.copy()

In [None]:
df1["Date"]=pd.to_datetime(brent_df["Date"],format='%d/%m/%Y')
df1.head(1)

In [None]:
df1.info()

In [None]:
hist=df1.set_index('Date')
hist.head(5)

In [None]:
hist.index = pd.to_datetime(hist.index, unit='s')
target_col = 'Close'

In [None]:
hist.head(5)

In [None]:
hist.isna().sum()

In [None]:
hist.shape

In [None]:
def train_test_split(df, test_size=0.2):

    split_row = len(df) - int(test_size * len(df))
    train_data = df.iloc[:split_row]
    test_data = df.iloc[split_row:]
    return train_data, test_data

In [None]:
train, test = train_test_split(hist, test_size=0.2)

In [None]:
def line_plot(line1, line2, label1=None, label2=None, title='Brent_oil', lw=1):
    fig, ax = plt.subplots(1, figsize=(13, 7))
    ax.plot(line1, label=label1, linewidth=lw)
    ax.plot(line2, label=label2, linewidth=lw)
    ax.set_ylabel('price [USD]', fontsize=14)
    ax.set_title(title, fontsize=16)
    ax.legend(loc='best', fontsize=16);

In [None]:
line_plot(train[target_col], test[target_col], 'training', 'test', title='Brent_oil')

In [None]:
# def normalise_zero_base(df):
#     return df / df.iloc[0] - 1

# def normalise_min_max(df):
#     return (df - df.min()) / (data.max() - df.min())

def normalise_zero_base(df):
    new_df= df / df.iloc[3] - 1
    #print(f'DF: {df}\n\nDF iloc:\n{df.iloc[3]-1} \n\n')
    new_df=new_df.fillna(0)
    new_df=new_df.replace(np.inf,0)
    return new_df

def normalise_min_max(df):
    return (df - df.min()) / (data.max() - df.min())

In [None]:
def extract_window_data(df, window_len=5, zero_base=True):
    window_data = []
    for idx in range(len(df) - window_len):
        tmp = df[idx: (idx + window_len)].copy()
        if zero_base:
            tmp = normalise_zero_base(tmp)
        window_data.append(tmp.values)
    return np.array(window_data)

In [None]:
def prepare_data(df, target_col, window_len=10, zero_base=True, test_size=0.2):
    train_data, test_data = train_test_split(df, test_size=test_size)
    X_train = extract_window_data(train_data, window_len, zero_base)
    X_test = extract_window_data(test_data, window_len, zero_base)
    y_train = train_data[target_col][window_len:].values
    y_test = test_data[target_col][window_len:].values
    if zero_base:
        y_train = y_train / train_data[target_col][:-window_len].values - 1
        y_test = y_test / test_data[target_col][:-window_len].values - 1

    return train_data, test_data, X_train, X_test, y_train, y_test

In [None]:
def build_lstm_model(input_data, output_size, neurons=100, activ_func='linear',
                     dropout=0.2, loss='mse', optimizer='adam'):
    model = Sequential()
    model.add(LSTM(neurons, input_shape=(input_data.shape[1], input_data.shape[2])))
    model.add(Dropout(dropout))
    model.add(Dense(units=output_size))
    model.add(Activation(activ_func))

    model.compile(loss=loss, optimizer=optimizer)
    return model

In [None]:
np.random.seed(42)
window_len = 5
test_size = 0.2
zero_base = True
lstm_neurons = 100
epochs = 30
batch_size = 32
loss = 'mse'
dropout = 0.2
optimizer = 'adam'

In [None]:
train, test, X_train, X_test, y_train, y_test = prepare_data(
    hist, target_col, window_len=window_len, zero_base=zero_base, test_size=test_size)

In [None]:
model = build_lstm_model(
    X_train, output_size=1, neurons=lstm_neurons, dropout=dropout, loss=loss,
    optimizer=optimizer)
history = model.fit(
    X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1, shuffle=True)

In [None]:
targets = test[target_col][window_len:]
preds = model.predict(X_test).squeeze()
mean_absolute_error(preds, y_test)

In [None]:
preds = test[target_col].values[:-window_len] * (preds + 1)
preds = pd.Series(index=targets.index, data=preds)
#line_plot(targets, preds, 'actual', 'prediction', lw=1)

In [None]:
t = targets.to_frame("Close")
p = preds.to_frame("Close")

p = p.shift(periods=-1)

p = p.iloc[:-1]
t = t.iloc[:-1]

graphdata = t.merge(p, how='inner', on='Date', suffixes=('_actual', '_prediction'))

#print(t, p, graphdata)

In [None]:
import plotly.express as px
import plotly.graph_objects as go

fig = go.Figure([

    go.Scatter(
        name='Actual',
        x=graphdata.index,
        y=graphdata['Close_actual'],
        mode='lines',
        marker=dict(color="#008080"),
        line=dict(width=1),
        showlegend=True
    ),
    go.Scatter(
        name='Prediction',
        x=graphdata.index,
        y=graphdata['Close_prediction'],
        marker=dict(color="#FF8C00"),
        line=dict(width=1),
        mode='lines',
        fillcolor='rgba(68, 68, 68, 0.3)',
        showlegend=True
    )
])

fig.update_layout(
    yaxis_title='',
    title='',
    hovermode="x"
)

fig.show()