## ANN and GA

用 ANN 去預測股票走勢感覺不大靠譜，換一個思路。

預測股價是回歸計算，找出一個函數能夠給出具體的值，之後再人為根據這個值做判斷。另一種方法是直接給出買/賣信號，是分類計算，不需要理會如何得出買賣的過程，只用 BT 判斷結果，再用 GA 篩選能夠給到更高 profit 的 NN。

In [1]:
import torch
import pygad
from pygad import torchga

from backtesting import Backtest, Strategy

import yfinance as yf

import pandas as pd
import pandas_ta as ta

import datetime

from utils.rolling_to_list import rolling_to_list

from IPython.display import clear_output



In [2]:
model_path = 'model_friday_010.pt'

In [3]:
# prepare device
device = torch.device("cpu")
if torch.backends.mps.is_available():
    device = torch.device("mps")

In [4]:
loss_function = torch.nn.MSELoss()
# loss_function = torch.nn.L1Loss()
# loss_function = torch.nn.BCELoss()

def prepare_model(input_size):
    sigmoid_layer = torch.nn.Sigmoid()
    relu_layer = torch.nn.ReLU()
    
    layers = []
    layers.append(torch.nn.Linear(input_size, 400))
    layers.append(relu_layer)
    layers.append(torch.nn.Linear(400, 200))
    layers.append(relu_layer)
    layers.append(torch.nn.Linear(200, 1))
    # layers.append(sigmoid_layer)
    
    model = torch.nn.Sequential(*layers).to(device)
    # model = nn_model.MLPClassifier(nn_data.input_size).to(nn_data.device)

    # optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
    # optimizer = torch.optim.RMSprop(model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    optimizer = torch.optim.Adagrad(model.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0)
    # optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)

    return model, optimizer


In [5]:
df = yf.download("^SPX", period="30d", interval="5m")

df.index = df.index.tz_convert('Asia/Shanghai')

[*********************100%%**********************]  1 of 1 completed


In [6]:
rolling_back = 180

In [7]:
rolled = rolling_to_list(df, rolling_back)

rolled_hist = pd.Series(rolled, index=df.index) #.shift(-rolling_back + 1)

In [8]:
class Up10(Strategy):
    def init(self):
        self.start_time = datetime.time(22, 15, 0)
        self.end_time = datetime.time(0, 30, 0)
        
    def next(self):
        global model, rolled_hist, device

        # 只在指定時間段交易
        t = self.data.index[-1].time()
        in_ts = t > self.start_time or t <= self.end_time

        idx = self.data.index[-1]
        hist = rolled_hist[idx]
        inputs = torch.Tensor(hist).to(device)
        pred = model(inputs)[0]
        
        if pred > 0.5 and in_ts:
            self.buy()
        elif self.position: 
            self.position.close()

In [9]:
bt = Backtest(
    df, 

    # Strategy
    Up10, 

    # Commission
    commission=.0001, 

    # default marketing on next day's open price, change to close price of current day
    trade_on_close=True,

    # 1 order each time
    exclusive_orders=True 
)

In [10]:
# bt.run()

In [11]:
indicator = 'Return [%]'
# indicator = 'Win Rate [%]' # not work... why??

In [12]:
def fitness_func(ga_instance, solution, sol_idx):
    """計算 model 的適應度"""

    # global data_inputs, data_outputs, torch_ga, model, loss_function
    global torch_ga, model, indicator

    model_weights_dict = torchga.model_weights_as_dict(model=model, weights_vector=solution)

    # Use the current solution as the model parameters.
    model.load_state_dict(model_weights_dict)
    
    stats = bt.run()

    return float(stats[indicator])

In [13]:
def callback_generation(ga_instance):
    clear_output(wait=True)
    print("Generation = {generation}".format(generation=ga_instance.generations_completed))
    print("Fitness    = {fitness}".format(fitness=ga_instance.best_solution()[1]))

In [14]:
# train_set = prepare_data(df)
train_set = df

input_size = train_set.shape[1] * rolling_back

model, optimizer = prepare_model(input_size)

In [15]:
torch_ga = torchga.TorchGA(model=model, num_solutions=30)

In [16]:
# Prepare the PyGAD parameters. Check the documentation for more information: https://pygad.readthedocs.io/en/latest/pygad.html#pygad-ga-class
num_generations = 50 # Number of generations.
num_parents_mating = 5 # Number of solutions to be selected as parents in the mating pool.
initial_population = torch_ga.population_weights # Initial population of network weights

In [17]:
ga_instance = pygad.GA(num_generations=num_generations, 
                       num_parents_mating=num_parents_mating, 
                       initial_population=initial_population,
                       fitness_func=fitness_func,
                       on_generation=callback_generation)




In [18]:
ga_instance.run()

Generation = 50
Fitness    = 1.7543197192385194


In [19]:
# Returning the details of the best solution.
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print("Fitness value of the best solution = {solution_fitness}".format(solution_fitness=solution_fitness))
print("Index of the best solution : {solution_idx}".format(solution_idx=solution_idx))

Fitness value of the best solution = 1.7543197192385194
Index of the best solution : 0


In [20]:
# Fetch the parameters of the best solution.
best_solution_weights = torchga.model_weights_as_dict(model=model,weights_vector=solution)
model.load_state_dict(best_solution_weights)

torch.save(model, model_path)

In [21]:
stats = bt.run()

In [22]:
stats

Start                     2024-01-11 22:30...
End                       2024-02-24 04:55...
Duration                     43 days 06:25:00
Exposure Time [%]                   13.076923
Equity Final [$]                 10175.431972
Equity Peak [$]                  10181.206634
Return [%]                            1.75432
Buy & Hold Return [%]                6.158447
Return (Ann.) [%]                   18.715628
Volatility (Ann.) [%]                3.258371
Sharpe Ratio                          5.74386
Sortino Ratio                       18.665891
Calmar Ratio                        43.115792
Max. Drawdown [%]                   -0.434078
Avg. Drawdown [%]                   -0.105062
Max. Drawdown Duration       12 days 22:45:00
Avg. Drawdown Duration        2 days 00:29:00
# Trades                                  196
Win Rate [%]                        58.673469
Best Trade [%]                       0.235432
Worst Trade [%]                     -0.176789
Avg. Trade [%]                    

In [23]:
bt.plot()

  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  df2 = (df.assign(_width=1).set_index('datetime')
  fig = gridplot(
  fig = gridplot(
