In [4]:
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from statistics import mean
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU

cvx_data = pd.read_csv("C:/Users/coryg/OneDrive/Desktop/STAT_574_Data_Mining/CVX_historical_data_shock.csv",
                       index_col="Date", parse_dates=["Date"])
cvx_data = cvx_data.drop(["Close"], axis=1)

# Splitting the data into training and testing sets. 

train = cvx_data[cvx_data.index < pd.to_datetime("2022-01-02", format='%Y-%m-%d')]
test = cvx_data[cvx_data.index >= pd.to_datetime("2022-01-02", format='%Y-%m-%d')]

train_set = train.loc[:, "Shock"].values
test_set = test.loc[:, "Shock"].values

# Splitting training data into samples. 

nsteps=60

def split_sequence(sequence):
    x, y = list(), list()
    for i in range(len(sequence)):
        end_i = i + nsteps
        if end_i > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_i], sequence[end_i]
        x.append(seq_x)
        y.append(seq_y)
    return np.array(x), np.array(y)

train_x, train_y = split_sequence(train_set)

In [6]:
# Fitting LSTM Model

features=1
train_x = train_x.reshape(train_x.shape[0], train_x.shape[1], features)

model_lstm = Sequential()
model_lstm.add(LSTM(units=6, activation="sigmoid", input_shape=(nsteps, features)))
model_lstm.add(Dense(units=1))

model_lstm.compile(loss="binary_crossentropy")
model_lstm.fit(train_x, train_y, epochs=5, batch_size=32)
inputs = cvx_data.loc[:,"Shock"][len(cvx_data.loc[:,"Shock"])-len(test_set)-nsteps :].values

test_x, test_y = split_sequence(inputs)
test_x = test_x.reshape(test_x.shape[0], test_x.shape[1], features)

pred_prob = model_lstm.predict(test_x)

cutoff = []
accuracy = []
for i in range(99):
    tp=0
    tn=0
    cutoff.append(0.01*(i+1))
    for sub1, sub2 in zip(pred_prob, test_y):
        tp_ind = 1 if (sub1>0.01*(i+1) and sub2==1) else 0
        tn_ind = 1 if (sub1<0.01*(i+1) and sub2==0) else 0
        tp+=tp_ind
        tn+=tn_ind
    accuracy_i = (tp+tn)/len(pred_prob)
    accuracy.append(accuracy_i)

df = pd.DataFrame({'accuracy': accuracy, 'cut-off':cutoff})
max_accuracy = max(accuracy)
optimal=df[df['accuracy']==max_accuracy]
print(optimal)

Epoch 1/5


  super().__init__(**kwargs)


[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.5266
Epoch 2/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.4353
Epoch 3/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.4260
Epoch 4/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.4176
Epoch 5/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - loss: 0.3981
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
    accuracy  cut-off
0   0.954092     0.01
1   0.954092     0.02
2   0.954092     0.03
3   0.954092     0.04
4   0.954092     0.05
..       ...      ...
74  0.954092     0.75
75  0.954092     0.76
76  0.954092     0.77
77  0.954092     0.78
78  0.954092     0.79

[79 rows x 2 columns]


In [8]:
# Fitting GRU Architecture

model_gru = Sequential()
model_gru.add(GRU(units=6, activation="sigmoid", input_shape=(nsteps, features)))
model_gru.add(Dense(units=1))

model_gru.compile(loss="binary_crossentropy")
model_gru.fit(train_x, train_y, epochs=5, batch_size=32)

pred_prob = model_gru.predict(test_x)

cutoff=[]
accuracy=[]
for i in range(99):
    tp=0
    tn=0
    cutoff.append(0.01*(i+1))
    for sub1, sub2 in zip(pred_prob, test_y):
        tp_ind=1 if (sub1>0.01*(i+1) and sub2==1) else 0
        tn_ind=1 if (sub1<0.01*(i+1) and sub2==0) else 0
        tp+=tp_ind
        tn+=tn_ind
        
    accuracy_i=(tp+tn)/len(pred_prob)
    accuracy.append(accuracy_i)
    
df=pd.DataFrame({'accuracy': accuracy,'cut-off': cutoff})
max_accuracy=max(accuracy)
optimal=df[df['accuracy']==max_accuracy]
print(optimal)

Epoch 1/5


  super().__init__(**kwargs)


[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - loss: 2.4998
Epoch 2/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.6285
Epoch 3/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - loss: 0.4338
Epoch 4/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 0.4110
Epoch 5/5
[1m93/93[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - loss: 0.4252
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
    accuracy  cut-off
0   0.954092     0.01
1   0.954092     0.02
2   0.954092     0.03
3   0.954092     0.04
4   0.954092     0.05
..       ...      ...
80  0.954092     0.81
81  0.954092     0.82
82  0.954092     0.83
83  0.954092     0.84
84  0.954092     0.85

[85 rows x 2 columns]
