In [1]:
## 1. Install and import required library

In [2]:
#!pip install yfinance
#!pip install keras
#!pip install tensorflow

In [3]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler

import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM
import keras.backend as K

ModuleNotFoundError: ignored

### 2. Visualization

In [None]:
# Download data
Df = yf.download('GLD', end="2021-04-20",  auto_adjust=True)
Df.to_excel("data.xlsx")
Df.head()

In [None]:
!mkdir output
# Only keep close columns
Df = Df[['Close']]

# Drop rows with missing values
Df = Df.dropna()

# Plot the closing price of GLD
Df.Close.plot(figsize=(15, 7), color='r')
plt.ylabel("Gold ETF Prices")
plt.title("Gold ETF Price Series")
plt.savefig('output/draw01.png')
plt.show()

### 3. Define explanatory variables

In [None]:
Df['S_3'] = Df['Close'].rolling(window=3).mean()
Df['S_9'] = Df['Close'].rolling(window=9).mean()
Df['next_day_price'] = Df['Close'].shift(-1)

Df = Df.dropna()
X = Df[['S_3', 'S_9']]
X.head(10)


In [None]:
# Define dependent variable
y = Df['next_day_price']
y.head(10)

### 4. Split the data into train and test dataset

In [None]:
t = .8
t = int(t*len(Df))

# Train dataset
X_train = X[:t]
y_train = y[:t]

# Test dataset
X_test = X[t:]
y_test = y[t:]

### 5. Linear Regression

#### 5.1 Create a linear regression model

In [None]:
linear = LinearRegression().fit(X_train, y_train)
print("Linear Regression model")
print("Gold ETF Price (y) = %.2f * 3 Days Moving Average (x1) \
+ %.2f * 9 Days Moving Average (x2) \
+ %.2f (constant)" % (linear.coef_[0], linear.coef_[1], linear.intercept_))

#### 5.2 Predicting the Gold ETF prices

In [None]:

predicted_price = linear.predict(X_test)
predicted_price = pd.DataFrame(predicted_price, index=y_test.index, columns=['price'])
predicted_price.plot(figsize=(15, 10))
y_test.plot()
plt.legend(['predicted_price', 'actual_price'])
plt.ylabel("Gold ETF Price")
plt.savefig('output/draw02.png')
plt.show()

#### 5.3 the goodness of the fit

In [None]:
r2_score = linear.score(X[t:], y[t:])*100
float("{0:.2f}".format(r2_score))

In [None]:
y_pred = linear.predict(X_test)
delta = sum(np.absolute(y_pred - y_test))
print(delta)

In [None]:
import math
from sklearn.metrics import mean_squared_error
sc = MinMaxScaler()

y_train = sc.fit_transform(Df[['Close']][:t])
y_test = y_test * sc.scale_
y_pred = y_pred * sc.scale_

rmse = 100 - 100 * math.sqrt(mean_squared_error(y_test, y_pred))
print(rmse)

### 6. LSTM

#### 6.1 Get dataset

In [None]:
# Download data
Df = yf.download('GLD', end="2021-04-20", auto_adjust=True)

# Only keep close columns
Df = Df[['Close']]

# Drop rows with missing values
Df = Df.dropna()

train, test = np.split(Df, [int(.8*len(Df))])
train.shape

In [None]:
sc = MinMaxScaler()

train_sc = sc.fit_transform(train)
test_sc = sc.transform(test)

In [None]:
print(sc.scale_)

In [None]:
X_train = train_sc[:-1]
y_train = train_sc[1:]

X_test = test_sc[:-1]
y_test = test_sc[1:]

In [None]:
X_train_t = X_train[:, None]
X_test_t = X_test[:, None]

In [None]:
print(train.shape, test.shape)

#### 6.2 Create Model

In [None]:
def acr(y_train, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_train), axis=-1))

In [None]:
K.clear_session()
model = Sequential()

model.add(LSTM(50, input_shape=(1, 1)))

model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=[acr])
model.summary()

#### 6.3 Train model

In [None]:
%%time
history = model.fit(X_train_t, y_train, epochs=50, verbose=0)

In [None]:
%%time
y_pred = model.predict(X_test_t)

y_test_plot = pd.DataFrame(index=test.index)
y_test_plot = y_test_plot.iloc[1:]
y_test_plot['value'] = y_test / sc.scale_

y_pred_plot = pd.DataFrame(index=test.index)
y_pred_plot = y_pred_plot.iloc[1:]
y_pred_plot['value'] = y_pred / sc.scale_

plt.figure(figsize=(15, 10))
plt.plot(y_test_plot)
plt.plot(y_pred_plot)
plt.legend(['real', 'predict'])
plt.ylabel("Gold ETF Price")
plt.savefig('output/draw03.png')
plt.show()

#### 6.4 Plot training and validation loss per epoch

In [None]:
loss = history.history['loss']

epochs = range(len(loss)) # Get number of epochs

In [None]:
plt.plot(epochs, loss, 'r')
plt.title('Training loss and accuracy')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend(["Loss", "Accuracy"])
plt.savefig('output/draw04.png')
plt.figure(figsize=(15, 10))
plt.show()

In [None]:
print("result predict: ", sc.inverse_transform(y_pred)[-1])

#### 6.5 the goodness of the fit

In [None]:
score = model.evaluate(X_test_t, y_test, verbose=1)
print(score)

In [None]:
score = sum(np.absolute(y_pred - y_test)) / sc.scale_
print(score)

In [None]:
delta = pd.DataFrame(index=test.index)
delta = delta.iloc[1:]
delta['value'] = np.absolute(y_pred - y_test) / sc.scale_
plt.figure(figsize=(15, 10))
plt.plot(delta)
plt.legend(['delta'])
plt.ylabel("Delta Price")
plt.savefig('output/draw05.png')
plt.show()

Accuracy

In [None]:
import sklearn
def rmse(y_test, y_pred):
    return math.sqrt(mean_squared_error(y_test, y_pred))
from sklearn.metrics import r2_score
print("mean absolute errors: ", sklearn.metrics.mean_absolute_error(y_test, y_pred), ", accuracy: ", 100 - 100 * sklearn.metrics.mean_absolute_error(y_test, y_pred))
print("mean squared errors: ", mean_squared_error(y_test, y_pred), ", accuracy: ", 100 - 100 * mean_squared_error(y_test, y_pred))
print("root mean squared error: ", rmse(y_test, y_pred), ", accuracy: ", 100 - 100 * rmse(y_test, y_pred))
print("r2 score: ", r2_score(y_test, y_pred) * 100)

r2 score

#### 6.6 Save model

In [None]:
model.save_weights('my_model_weights.h5')

In [None]:
model.save('my_model')

### 6.7 Optimal

In [None]:
%%time
results = pd.DataFrame()
epochs = [10]
for i in epochs:
    error_scores = list()
    for j in range(30):
        history = model.fit(X_train_t, y_train, epochs=i, verbose=0)
        y_pred = model.predict(X_test_t)
        error_scores.append(rmse(y_test, y_pred))
    
    
    results[str(i)] = error_scores

print(results.describe())
# save boxplot
results.boxplot()
plt.savefig('output/draw06.png')
plt.show()

In [None]:
#@title
# import datetime and get today's date

import datetime as dt
current_date = dt.datetime.now()
print(current_date)
# Get the data
data = yf.download('GLD', '2008-06-01', current_date, auto_adjust=True)
data['S_3'] = data['Close'].rolling(window=3).mean()
data['S_9'] = data['Close'].rolling(window=9).mean()

In [None]:
import datetime as dt
current_date = dt.datetime.now()
look_back=15
df = yf.download('GLD', end=current_date, auto_adjust=True)
close_data = df['Close'].values
close_data = close_data.reshape((-1))

def predict(num_prediction, model):
    prediction_list = close_data[-look_back:]
    
    for _ in range(num_prediction):
        x = prediction_list[-look_back:]
        x = x.reshape((1, look_back, 1))
        out = model.predict(x)[0][0]
        #print(out)
        prediction_list = np.append(prediction_list, 100+out*10)
    prediction_list = prediction_list[look_back-1:]
        
    return prediction_list
    
def predict_dates(num_prediction):
    last_date = df.index.values[-1]
    prediction_dates = pd.date_range(last_date, periods=num_prediction+1).tolist()
    return prediction_dates

num_prediction = 30
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)

In [None]:
import plotly.graph_objects as go
trace1 = go.Scatter(
    x = df.index.tolist(),
    y = close_data,
    mode = 'lines',
    name = 'Data'
)
trace2 = go.Scatter(
    x = forecast_dates,
    y = forecast,
    mode = 'lines',
    name = 'Prediction'
)
layout = go.Layout(
    title = "Gold Rates",
    xaxis = {'title' : "Date"},
    yaxis = {'title' : "Close"}
)

fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()