In [1]:
import os
import torch
import torch.nn.functional as F
from tqdm import tqdm
from torch_geometric.loader import NeighborLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch_geometric.nn import MessagePassing, SAGEConv
from ogb.nodeproppred import Evaluator, PygNodePropPredDataset
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch_geometric.data import Data
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import seaborn as sns
import matplotlib.pyplot as plt
from early_stopping import EarlyStopping
import random
from yf_dataset import getInput

seed = 42
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [5]:
train_idx, eval_idx, test_idx, weight, yfdata, gnnInputData = getInput(False, False, withMacdSignal=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = torch.load(r'C:\Users\tony\Desktop\GNN_test\result\CrossEntrophyLoss\CrossEntrophyLoss with MACD Signal startLr=4.262769e-05 numNeighbors=18\best_model.pt')

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yfdata['Signal'].iloc[i] = 0  # 买入信号
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yfdata['Signal'].iloc[i] = 2  # 卖出信号
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yfdata['Signal'].iloc[i] = 0  # 买入信号
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  yfdata['Signal'].iloc[i] = 2  # 卖出信号
A value is trying to

If there is NAN in data?  False
Index(['2011-01-31 00:00:00 ~ 2011-02-09 00:00:00',
       '2011-02-01 00:00:00 ~ 2011-02-10 00:00:00',
       '2011-02-02 00:00:00 ~ 2011-02-11 00:00:00',
       '2011-02-03 00:00:00 ~ 2011-02-14 00:00:00',
       '2011-02-04 00:00:00 ~ 2011-02-15 00:00:00',
       '2011-02-07 00:00:00 ~ 2011-02-16 00:00:00',
       '2011-02-08 00:00:00 ~ 2011-02-17 00:00:00',
       '2011-02-09 00:00:00 ~ 2011-02-18 00:00:00',
       '2011-02-10 00:00:00 ~ 2011-02-22 00:00:00',
       '2011-02-11 00:00:00 ~ 2011-02-23 00:00:00',
       ...
       '2019-08-08 00:00:00 ~ 2019-08-19 00:00:00',
       '2019-08-09 00:00:00 ~ 2019-08-20 00:00:00',
       '2019-08-12 00:00:00 ~ 2019-08-21 00:00:00',
       '2019-08-13 00:00:00 ~ 2019-08-22 00:00:00',
       '2019-08-14 00:00:00 ~ 2019-08-23 00:00:00',
       '2019-08-15 00:00:00 ~ 2019-08-26 00:00:00',
       '2019-08-16 00:00:00 ~ 2019-08-27 00:00:00',
       '2019-08-19 00:00:00 ~ 2019-08-28 00:00:00',
       '2019-08-20 00

In [6]:
total_loader = NeighborLoader(gnnInputData, input_nodes=test_idx, num_neighbors=[-1],
                                shuffle=False,
                                num_workers=os.cpu_count() - 2)

In [7]:
model.eval()
model.to(device)
out, var = model.inference(total_loader, device)
y_pred = out.argmax(dim=-1) 

In [8]:
start_date = "2011-01-30"
end_date = "2019-09-01"
spy = yf.download('SPY', start=start_date, end=end_date)

[*********************100%%**********************]  1 of 1 completed


In [10]:
label_data = spy['2018-03-28':]
label_data = label_data[:-1]
init_investment = 1000
investment = init_investment
sAndp_hold = 0
total = []
sell_count = 0
buy_count = 0
for index, pred in enumerate(y_pred):
    pred = pred.numpy()
    open = label_data.iloc[index]['Open']
    adj_close = label_data.iloc[index]['Adj Close']

    if ((pred == 0) & (investment >= adj_close)):
        investment -= adj_close
        sAndp_hold += 1
        buy_count += 1
    elif ((pred == 2) & (sAndp_hold > 0)):
        investment += adj_close
        sAndp_hold -= 1
        sell_count += 1

    total.append(investment + (sAndp_hold * adj_close))
          
total

[1000.0,
 1000.0,
 1000.0,
 1000.0,
 1002.5333557128906,
 1004.4220428466797,
 999.0466156005859,
 1000.2088623046875,
 1007.7453002929688,
 1003.9591369628906,
 1009.8700561523438,
 1007.7453002929688,
 1013.6836395263672,
 1021.4745941162109,
 1022.0191955566406,
 1017.9332427978516,
 1011.7225341796875,
 1011.6134033203125,
 1001.8342437744141,
 1003.0146331787109,
 1005.4480590820312,
 1005.9021911621094,
 1002.1792907714844,
 1003.0328979492188,
 999.8003845214844,
 998.7470397949219,
 1004.9214477539062,
 1007.3731384277344,
 1007.3731384277344,
 1014.4011383056641,
 1023.5536651611328,
 1026.5683135986328,
 1027.0406036376953,
 1020.2124786376953,
 1024.3529815673828,
 1023.5175323486328,
 1021.0477447509766,
 1028.4569854736328,
 1025.6967315673828,
 1028.4206085205078,
 1026.3867950439453,
 1024.0260162353516,
 1012.6576690673828,
 1025.6967315673828,
 1019.6311187744141,
 1026.876968383789,
 1031.598648071289,
 1032.3253936767578,
 1040.678726196289,
 1040.5699005126953,
 104

In [11]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=label_data.index, y=list(map(lambda x:x/init_investment, total)), name='Return Percentage'))

fig.update_layout(title="S&P500")
fig.show()
# daily_return = list(map(lambda idx_val: (idx_val[1] - total[idx_val[0] - 1]) / total[idx_val[0] - 1] if idx_val[0] > 0 else 0, enumerate(total)))
print('average_daily_return = ',((total[-1] / init_investment) - 1) / len(total))

average_daily_return =  0.00038486096055394747
