#### How to use Boosting

`https://keeryang.github.io/papers/Profits_2021Aug28.pdf`

In [17]:
import optuna
from lightgbm import plot_importance, LGBMRegressor
import getFamaFrenchFactors as gff
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score

In [18]:
stocks_df = pd.read_csv('../../data/market_data/sp500/sp500_stocks.csv')
df = stocks_df.pivot(
    index='Date', columns='Symbol', values='Adj Close')

df = df.reset_index()
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

df.head()

Symbol,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,20.122227,4.496877,6.470741,,,18.952162,7.994444,32.21246,37.09,23.694084,...,52.883579,9.905468,41.963718,12.918809,43.185623,,19.158102,52.587051,28.67,
2010-01-05,19.903643,5.005957,6.481929,,,18.799042,7.967778,32.411549,37.700001,23.656675,...,52.765053,10.115747,44.515926,12.765595,43.354244,,19.092571,54.251759,28.620001,
2010-01-06,19.83293,4.798554,6.378825,,,18.903446,7.933333,32.756096,37.619999,23.611784,...,53.614498,10.003899,43.932011,12.79011,43.72897,,18.956089,54.234219,28.4,
2010-01-07,19.807215,4.939964,6.367033,,,19.060045,7.886667,32.725471,36.889999,23.424749,...,53.456463,9.959157,44.870213,12.734954,43.591564,,18.950626,55.478374,27.690001,
2010-01-08,19.800785,4.845691,6.409364,,,19.157482,7.871111,32.595306,36.689999,23.559416,...,53.397202,9.867439,44.548744,12.741086,43.416687,,18.956089,54.313072,27.6,


In [19]:
tickers = ['AAPL']

In [20]:
ff5 = pd.DataFrame(gff.famaFrench5Factor(frequency='m'))
ff5.head()

Unnamed: 0,date_ff_factors,Mkt-RF,SMB,HML,RMW,CMA,RF
0,1963-07-31,-0.0039,-0.0041,-0.0097,0.0068,-0.0118,0.0027
1,1963-08-31,0.0507,-0.008,0.018,0.0036,-0.0035,0.0025
2,1963-09-30,-0.0157,-0.0052,0.0013,-0.0071,0.0029,0.0027
3,1963-10-31,0.0253,-0.0139,-0.001,0.028,-0.0201,0.0029
4,1963-11-30,-0.0085,-0.0088,0.0175,-0.0051,0.0224,0.0027


In [21]:
fff = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']

N_lags = 10

for f in fff:
  for i in range(1, N_lags):
    ff5[f'{f}_{i}L'] = ff5[f].shift(-i)


In [22]:
mon = pd.DataFrame(df[tickers[0]]).resample('ME').last()
mon_rets = mon.pct_change().dropna()

factors = ff5.rename(columns={'date_ff_factors': 'Date'})
factors_0 = pd.merge(mon_rets, factors, on='Date', how='left')
factors_0 = factors_0.dropna()

Y = (factors_0[tickers[0]] - factors_0['RF'])
X = factors_0.drop(
    columns=['RF', tickers[0]]).set_index('Date')

In [23]:
top15features_stable = ['RMW_5L',
                        'RMW_2L',
                        'SMB_6L',
                        'CMA_9L',
                        'SMB_2L',
                        'HML_5L',
                        'Mkt-RF_7L',
                        'CMA_4L',
                        'CMA_6L',
                        'RMW_6L',
                        'CMA_8L',
                        'Mkt-RF_2L',
                        'CMA',
                        'RMW',
                        'Mkt-RF']

In [24]:

X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, shuffle=False)

## Обучение LSTM

#### How to use LSTM for 5-factor Fama French model

`https://downloads.hindawi.com/journals/ddns/2022/3936122.pdf?_gl=1*8t8631*_ga*MTYwMjYwNDYwNy4xNzE1MzYyMzYy*_ga_NF5QFMJT5V*MTcxNTM2MjM2Mi4xLjEuMTcxNTM2MjQyNC42MC4wLjA.&_ga=2.120926721.845710784.1715362363-1602604607.1715362362`

In [25]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [26]:
import torch.nn as nn
import numpy as np

# Определение размерности входных данных
input_size = 15  # Размерность входного слоя (например, количество признаков)
hidden_size = 10  # Размерность скрытого слоя
num_layers = 10  # Количество слоев LSTM
# Размерность выходного слоя (например, количество прогнозируемых значений)
output_size = 1

# Определение модели LSTM


class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size,
                            num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)  # Инициализация скрытого состояния
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)  # Инициализация ячеек состояния
        out, _ = self.lstm(x, (h0, c0))  # Прямое распространение через LSTM
        # Применение полносвязного слоя к последнему тензору времени
        out = self.fc(out[:, -1, :])
        return out

In [27]:

# Создание экземпляра модели
model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Определение функции потерь и оптимизатора
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [28]:
# train tensors
X_train_tensor = torch.tensor(
    X_train.loc[:, top15features_stable].values, dtype=torch.float32)
X_train_tensor = X_train_tensor.unsqueeze(1)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)


# test tensors
X_test_tensor = torch.tensor(
    X_test.loc[:, top15features_stable].values, dtype=torch.float32)
X_test_tensor = X_test_tensor.unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

model.to(device)
X_train_tensor = X_train_tensor.to(device)
y_train_tensor = y_train_tensor.to(device)

X_test_tensor = X_test_tensor.to(device)
y_test_tensor = y_test_tensor.to(device)

In [29]:
from tqdm import tqdm

# Обучение модели
for epoch in tqdm(range(1000)):  # Пример цикла обучения
    optimizer.zero_grad()  # Очистка градиентов
    outputs = model(X_train_tensor)  # Прогноз модели
    loss = criterion(outputs, y_train_tensor)  # Вычисление потерь
    loss.backward()  # Обратное распространение ошибки
    optimizer.step()  # Обновление весов
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

  return F.mse_loss(input, target, reduction=self.reduction)
  3%|▎         | 28/1000 [00:00<00:06, 149.40it/s]

Epoch 1, Loss: 0.01596958190202713
Epoch 2, Loss: 0.015443412587046623
Epoch 3, Loss: 0.01493040844798088
Epoch 4, Loss: 0.014430873095989227
Epoch 5, Loss: 0.013945050537586212
Epoch 6, Loss: 0.013473118655383587
Epoch 7, Loss: 0.013015287928283215
Epoch 8, Loss: 0.01257181353867054
Epoch 9, Loss: 0.012142952531576157
Epoch 10, Loss: 0.011728955432772636
Epoch 11, Loss: 0.01133005227893591
Epoch 12, Loss: 0.01094646006822586
Epoch 13, Loss: 0.010578353889286518
Epoch 14, Loss: 0.01022589486092329
Epoch 15, Loss: 0.009889213368296623
Epoch 16, Loss: 0.009568408131599426
Epoch 17, Loss: 0.009263547137379646
Epoch 18, Loss: 0.008974662981927395
Epoch 19, Loss: 0.008701753802597523
Epoch 20, Loss: 0.008444778621196747
Epoch 21, Loss: 0.008203661069273949
Epoch 22, Loss: 0.007978281937539577
Epoch 23, Loss: 0.0077684782445430756
Epoch 24, Loss: 0.007574041374027729
Epoch 25, Loss: 0.007394722197204828
Epoch 26, Loss: 0.007230218965560198
Epoch 27, Loss: 0.007080181036144495
Epoch 28, Loss:

 12%|█▏        | 120/1000 [00:00<00:02, 354.62it/s]

Epoch 69, Loss: 0.0061893039382994175
Epoch 70, Loss: 0.006187339313328266
Epoch 71, Loss: 0.006185675971210003
Epoch 72, Loss: 0.006184306927025318
Epoch 73, Loss: 0.006183214019984007
Epoch 74, Loss: 0.00618237629532814
Epoch 75, Loss: 0.006181766744703054
Epoch 76, Loss: 0.006181355100125074
Epoch 77, Loss: 0.006181110627949238
Epoch 78, Loss: 0.006181003525853157
Epoch 79, Loss: 0.00618100306019187
Epoch 80, Loss: 0.006181081756949425
Epoch 81, Loss: 0.00618121400475502
Epoch 82, Loss: 0.006181378848850727
Epoch 83, Loss: 0.006181556731462479
Epoch 84, Loss: 0.006181732285767794
Epoch 85, Loss: 0.006181892938911915
Epoch 86, Loss: 0.006182030308991671
Epoch 87, Loss: 0.006182139739394188
Epoch 88, Loss: 0.006182217504829168
Epoch 89, Loss: 0.006182260811328888
Epoch 90, Loss: 0.006182271987199783
Epoch 91, Loss: 0.006182252895087004
Epoch 92, Loss: 0.006182207725942135
Epoch 93, Loss: 0.006182139739394188
Epoch 94, Loss: 0.006182054989039898
Epoch 95, Loss: 0.006181956734508276
Epo

 21%|██        | 208/1000 [00:00<00:01, 407.62it/s]

Epoch 157, Loss: 0.006180992815643549
Epoch 158, Loss: 0.006180992815643549
Epoch 159, Loss: 0.006180992349982262
Epoch 160, Loss: 0.006180991884320974
Epoch 161, Loss: 0.006180992349982262
Epoch 162, Loss: 0.006180991418659687
Epoch 163, Loss: 0.006180991418659687
Epoch 164, Loss: 0.006180991884320974
Epoch 165, Loss: 0.006180991418659687
Epoch 166, Loss: 0.006180991884320974
Epoch 167, Loss: 0.006180991418659687
Epoch 168, Loss: 0.006180992349982262
Epoch 169, Loss: 0.006180992349982262
Epoch 170, Loss: 0.006180991418659687
Epoch 171, Loss: 0.006180992349982262
Epoch 172, Loss: 0.006180992349982262
Epoch 173, Loss: 0.006180991418659687
Epoch 174, Loss: 0.006180992349982262
Epoch 175, Loss: 0.006180991884320974
Epoch 176, Loss: 0.006180991884320974
Epoch 177, Loss: 0.006180991884320974
Epoch 178, Loss: 0.006180991418659687
Epoch 179, Loss: 0.006180992349982262
Epoch 180, Loss: 0.006180991884320974
Epoch 181, Loss: 0.006180992349982262
Epoch 182, Loss: 0.006180991884320974
Epoch 183, L

 36%|███▋      | 363/1000 [00:00<00:01, 481.20it/s]

Epoch 259, Loss: 0.006180992349982262
Epoch 260, Loss: 0.006180991884320974
Epoch 261, Loss: 0.006180991884320974
Epoch 262, Loss: 0.006180991418659687
Epoch 263, Loss: 0.006180991884320974
Epoch 264, Loss: 0.006180991884320974
Epoch 265, Loss: 0.006180991418659687
Epoch 266, Loss: 0.006180992349982262
Epoch 267, Loss: 0.006180992349982262
Epoch 268, Loss: 0.006180992349982262
Epoch 269, Loss: 0.006180992349982262
Epoch 270, Loss: 0.006180992349982262
Epoch 271, Loss: 0.006180992349982262
Epoch 272, Loss: 0.006180992349982262
Epoch 273, Loss: 0.006180992349982262
Epoch 274, Loss: 0.006180992349982262
Epoch 275, Loss: 0.006180992349982262
Epoch 276, Loss: 0.006180992349982262
Epoch 277, Loss: 0.006180992349982262
Epoch 278, Loss: 0.006180992349982262
Epoch 279, Loss: 0.006180992349982262
Epoch 280, Loss: 0.006180992349982262
Epoch 281, Loss: 0.006180992349982262
Epoch 282, Loss: 0.006180992349982262
Epoch 283, Loss: 0.006180992349982262
Epoch 284, Loss: 0.006180992349982262
Epoch 285, L

 41%|████      | 412/1000 [00:01<00:01, 480.09it/s]

Epoch 364, Loss: 0.006180991884320974
Epoch 365, Loss: 0.006180991884320974
Epoch 366, Loss: 0.006180991884320974
Epoch 367, Loss: 0.006180991884320974
Epoch 368, Loss: 0.006180991884320974
Epoch 369, Loss: 0.006180991884320974
Epoch 370, Loss: 0.006180991884320974
Epoch 371, Loss: 0.006180991884320974
Epoch 372, Loss: 0.006180991884320974
Epoch 373, Loss: 0.006180991884320974
Epoch 374, Loss: 0.006180991884320974
Epoch 375, Loss: 0.006180991884320974
Epoch 376, Loss: 0.006180991884320974
Epoch 377, Loss: 0.006180991884320974
Epoch 378, Loss: 0.006180991884320974
Epoch 379, Loss: 0.006180991884320974
Epoch 380, Loss: 0.006180991884320974
Epoch 381, Loss: 0.006180991884320974
Epoch 382, Loss: 0.006180991884320974
Epoch 383, Loss: 0.006180991884320974
Epoch 384, Loss: 0.006180991884320974
Epoch 385, Loss: 0.006180991884320974
Epoch 386, Loss: 0.006180991884320974
Epoch 387, Loss: 0.006180991884320974
Epoch 388, Loss: 0.006180991884320974
Epoch 389, Loss: 0.006180991884320974
Epoch 390, L

 51%|█████     | 510/1000 [00:01<00:01, 480.99it/s]

Epoch 460, Loss: 0.006180991884320974
Epoch 461, Loss: 0.006180991884320974
Epoch 462, Loss: 0.006180991884320974
Epoch 463, Loss: 0.006180991884320974
Epoch 464, Loss: 0.006180991884320974
Epoch 465, Loss: 0.006180991884320974
Epoch 466, Loss: 0.006180991884320974
Epoch 467, Loss: 0.006180991884320974
Epoch 468, Loss: 0.006180991884320974
Epoch 469, Loss: 0.006180991884320974
Epoch 470, Loss: 0.006180991884320974
Epoch 471, Loss: 0.006180991884320974
Epoch 472, Loss: 0.006180991884320974
Epoch 473, Loss: 0.006180991884320974
Epoch 474, Loss: 0.006180991884320974
Epoch 475, Loss: 0.006180991884320974
Epoch 476, Loss: 0.006180991884320974
Epoch 477, Loss: 0.006180991884320974
Epoch 478, Loss: 0.006180991884320974
Epoch 479, Loss: 0.006180991884320974
Epoch 480, Loss: 0.006180991884320974
Epoch 481, Loss: 0.006180991884320974
Epoch 482, Loss: 0.006180991884320974
Epoch 483, Loss: 0.006180991884320974
Epoch 484, Loss: 0.006180991884320974
Epoch 485, Loss: 0.006180991884320974
Epoch 486, L

 61%|██████    | 608/1000 [00:01<00:00, 480.86it/s]

Epoch 557, Loss: 0.006180991884320974
Epoch 558, Loss: 0.006180991884320974
Epoch 559, Loss: 0.006180991884320974
Epoch 560, Loss: 0.006180991884320974
Epoch 561, Loss: 0.006180991884320974
Epoch 562, Loss: 0.006180991884320974
Epoch 563, Loss: 0.006180991884320974
Epoch 564, Loss: 0.006180991884320974
Epoch 565, Loss: 0.006180991884320974
Epoch 566, Loss: 0.006180991884320974
Epoch 567, Loss: 0.006180991884320974
Epoch 568, Loss: 0.006180991884320974
Epoch 569, Loss: 0.006180991884320974
Epoch 570, Loss: 0.006180991884320974
Epoch 571, Loss: 0.006180991884320974
Epoch 572, Loss: 0.006180991884320974
Epoch 573, Loss: 0.006180991884320974
Epoch 574, Loss: 0.006180991884320974
Epoch 575, Loss: 0.006180991884320974
Epoch 576, Loss: 0.006180991884320974
Epoch 577, Loss: 0.006180991884320974
Epoch 578, Loss: 0.006180991884320974
Epoch 579, Loss: 0.006180991884320974
Epoch 580, Loss: 0.006180991884320974
Epoch 581, Loss: 0.006180991884320974
Epoch 582, Loss: 0.006180991884320974
Epoch 583, L

 71%|███████   | 708/1000 [00:01<00:00, 483.65it/s]

Epoch 654, Loss: 0.006180991884320974
Epoch 655, Loss: 0.006180991884320974
Epoch 656, Loss: 0.006180991884320974
Epoch 657, Loss: 0.006180991884320974
Epoch 658, Loss: 0.006180991884320974
Epoch 659, Loss: 0.006180991884320974
Epoch 660, Loss: 0.006180991884320974
Epoch 661, Loss: 0.006180991884320974
Epoch 662, Loss: 0.006180991884320974
Epoch 663, Loss: 0.006180991884320974
Epoch 664, Loss: 0.006180991884320974
Epoch 665, Loss: 0.006180991884320974
Epoch 666, Loss: 0.006180991884320974
Epoch 667, Loss: 0.006180991884320974
Epoch 668, Loss: 0.006180991884320974
Epoch 669, Loss: 0.006180991884320974
Epoch 670, Loss: 0.006180991884320974
Epoch 671, Loss: 0.006180991884320974
Epoch 672, Loss: 0.006180991884320974
Epoch 673, Loss: 0.006180991884320974
Epoch 674, Loss: 0.006180991884320974
Epoch 675, Loss: 0.006180991884320974
Epoch 676, Loss: 0.006180991884320974
Epoch 677, Loss: 0.006180991884320974
Epoch 678, Loss: 0.006180991884320974
Epoch 679, Loss: 0.006180991884320974
Epoch 680, L

 81%|████████  | 808/1000 [00:01<00:00, 488.41it/s]

Epoch 755, Loss: 0.006180991884320974
Epoch 756, Loss: 0.006180991884320974
Epoch 757, Loss: 0.006180991884320974
Epoch 758, Loss: 0.006180991884320974
Epoch 759, Loss: 0.006180991884320974
Epoch 760, Loss: 0.006180991884320974
Epoch 761, Loss: 0.006180991884320974
Epoch 762, Loss: 0.006180991884320974
Epoch 763, Loss: 0.006180991884320974
Epoch 764, Loss: 0.006180991884320974
Epoch 765, Loss: 0.006180991884320974
Epoch 766, Loss: 0.006180991884320974
Epoch 767, Loss: 0.006180991884320974
Epoch 768, Loss: 0.006180991884320974
Epoch 769, Loss: 0.006180991884320974
Epoch 770, Loss: 0.006180991884320974
Epoch 771, Loss: 0.006180991884320974
Epoch 772, Loss: 0.006180991884320974
Epoch 773, Loss: 0.006180991884320974
Epoch 774, Loss: 0.006180991884320974
Epoch 775, Loss: 0.006180991884320974
Epoch 776, Loss: 0.006180991884320974
Epoch 777, Loss: 0.006180991884320974
Epoch 778, Loss: 0.006180991884320974
Epoch 779, Loss: 0.006180991884320974
Epoch 780, Loss: 0.006180991884320974
Epoch 781, L

 91%|█████████ | 910/1000 [00:02<00:00, 497.60it/s]

Epoch 855, Loss: 0.006180991884320974
Epoch 856, Loss: 0.006180991884320974
Epoch 857, Loss: 0.006180991884320974
Epoch 858, Loss: 0.006180991884320974
Epoch 859, Loss: 0.006180991884320974
Epoch 860, Loss: 0.006180991884320974
Epoch 861, Loss: 0.006180991884320974
Epoch 862, Loss: 0.006180991884320974
Epoch 863, Loss: 0.006180991884320974
Epoch 864, Loss: 0.006180991884320974
Epoch 865, Loss: 0.006180991884320974
Epoch 866, Loss: 0.006180991884320974
Epoch 867, Loss: 0.006180991884320974
Epoch 868, Loss: 0.006180991884320974
Epoch 869, Loss: 0.006180991884320974
Epoch 870, Loss: 0.006180991884320974
Epoch 871, Loss: 0.006180991884320974
Epoch 872, Loss: 0.006180991884320974
Epoch 873, Loss: 0.006180991884320974
Epoch 874, Loss: 0.006180991884320974
Epoch 875, Loss: 0.006180991884320974
Epoch 876, Loss: 0.006180991884320974
Epoch 877, Loss: 0.006180991884320974
Epoch 878, Loss: 0.006180991884320974
Epoch 879, Loss: 0.006180991884320974
Epoch 880, Loss: 0.006180991884320974
Epoch 881, L

100%|██████████| 1000/1000 [00:02<00:00, 448.37it/s]

Epoch 958, Loss: 0.006180991884320974
Epoch 959, Loss: 0.006180991884320974
Epoch 960, Loss: 0.006180991884320974
Epoch 961, Loss: 0.006180991884320974
Epoch 962, Loss: 0.006180991884320974
Epoch 963, Loss: 0.006180991884320974
Epoch 964, Loss: 0.006180991884320974
Epoch 965, Loss: 0.006180991884320974
Epoch 966, Loss: 0.006180991884320974
Epoch 967, Loss: 0.006180991884320974
Epoch 968, Loss: 0.006180991884320974
Epoch 969, Loss: 0.006180991884320974
Epoch 970, Loss: 0.006180991884320974
Epoch 971, Loss: 0.006180991884320974
Epoch 972, Loss: 0.006180991884320974
Epoch 973, Loss: 0.006180991884320974
Epoch 974, Loss: 0.006180991884320974
Epoch 975, Loss: 0.006180991884320974
Epoch 976, Loss: 0.006180991884320974
Epoch 977, Loss: 0.006180991884320974
Epoch 978, Loss: 0.006180991884320974
Epoch 979, Loss: 0.006180991884320974
Epoch 980, Loss: 0.006180991884320974
Epoch 981, Loss: 0.006180991884320974
Epoch 982, Loss: 0.006180991884320974
Epoch 983, Loss: 0.006180991884320974
Epoch 984, L




Протестируем модель:

In [30]:
from sklearn.metrics import r2_score


# Выполнение предсказаний на тестовых данных
with torch.no_grad():
    model.eval()  # Установка модели в режим оценки
    y_pred = model(X_test_tensor)  # Предсказания модели
    y_pred = y_pred.cpu().numpy()  # Конвертация предсказаний в NumPy массив

# Вычисление R^2
r2 = r2_score(y_test.to_numpy(), y_pred.reshape(1, -1)[0])
print(f'R^2 score: {r2}')


R^2 score: -0.009416054007495234
