# Base model for stock prediction

In [1]:
from tools.log_controller import LogController
from pathlib import Path

log_controller = LogController(config_path=Path('config', 'logging_config.json'))
log_controller.start()

In [2]:
from core.data_repository import DataRepository
from pathlib import Path

data_repository = DataRepository(repo_path=Path('repo'))
AMZN_periodic_data = data_repository.get_dataframes()['AMZN']

AMZN_periodic_data

[32m 2025-09-04 23:09:39 - data_repository - INFO - Loaded: AAPL[0m
[32m 2025-09-04 23:09:39 - data_repository - INFO - Loaded: AMZN[0m
[32m 2025-09-04 23:09:39 - data_repository - INFO - Loaded periodic stock market data for: {'AAPL', 'AMZN'}[0m


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002-10-29,0.9285,0.9460,0.9035,0.9255,144840000
2002-10-30,0.9540,0.9610,0.9235,0.9425,137776000
2002-10-31,0.9680,0.9740,0.9425,0.9515,136452000
2002-11-01,0.9900,0.9950,0.9545,0.9610,110988000
2002-11-04,0.9390,0.9905,0.9295,0.9695,259270000
...,...,...,...,...,...
2003-10-22,2.7015,2.8075,2.6865,2.8025,654038000
2003-10-23,2.7160,2.7260,2.6455,2.6490,292234000
2003-10-24,2.7255,2.7470,2.6500,2.7275,208764000
2003-10-27,2.7410,2.7625,2.7250,2.7450,127838000


In [3]:
from core.data_preparator import DataPreparator

AMZN_supervised_data = DataPreparator.reformat_periodic_to_supervised_data(
    dataframe=AMZN_periodic_data,
    target_column='Close',
    t=5
)

AMZN_supervised_data

[32m 2025-09-04 23:09:39 - data_preparator - INFO - Preparing data for target column: Close, with t=5[0m


Unnamed: 0_level_0,Close,Close_0,Close_1,Close_2,Close_3,Close_4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2002-11-05,0.9360,0.9390,0.9900,0.9680,0.9540,0.9285
2002-11-06,0.9670,0.9360,0.9390,0.9900,0.9680,0.9540
2002-11-07,0.9560,0.9670,0.9360,0.9390,0.9900,0.9680
2002-11-08,0.9755,0.9560,0.9670,0.9360,0.9390,0.9900
2002-11-11,0.9500,0.9755,0.9560,0.9670,0.9360,0.9390
...,...,...,...,...,...,...
2003-10-22,2.7015,2.9675,2.9795,2.9845,2.9955,2.9275
2003-10-23,2.7160,2.7015,2.9675,2.9795,2.9845,2.9955
2003-10-24,2.7255,2.7160,2.7015,2.9675,2.9795,2.9845
2003-10-27,2.7410,2.7255,2.7160,2.7015,2.9675,2.9795


In [4]:
from core.dataset import StockDataset

AMZN_dataset = StockDataset(
    data=AMZN_supervised_data,
    ticker='AMZN',
    target_column='Close'
)

display(AMZN_dataset.X, AMZN_dataset.y, AMZN_dataset.X.shape, AMZN_dataset.y.shape)

[32m 2025-09-04 23:09:41 - dataset - INFO - Created stock dataset for AMZN, focusing target: Close[0m


Unnamed: 0_level_0,Close_0,Close_1,Close_2,Close_3,Close_4
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002-11-05,0.9390,0.9900,0.9680,0.9540,0.9285
2002-11-06,0.9360,0.9390,0.9900,0.9680,0.9540
2002-11-07,0.9670,0.9360,0.9390,0.9900,0.9680
2002-11-08,0.9560,0.9670,0.9360,0.9390,0.9900
2002-11-11,0.9755,0.9560,0.9670,0.9360,0.9390
...,...,...,...,...,...
2003-10-22,2.9675,2.9795,2.9845,2.9955,2.9275
2003-10-23,2.7015,2.9675,2.9795,2.9845,2.9955
2003-10-24,2.7160,2.7015,2.9675,2.9795,2.9845
2003-10-27,2.7255,2.7160,2.7015,2.9675,2.9795


Date
2002-11-05    0.9360
2002-11-06    0.9670
2002-11-07    0.9560
2002-11-08    0.9755
2002-11-11    0.9500
               ...  
2003-10-22    2.7015
2003-10-23    2.7160
2003-10-24    2.7255
2003-10-27    2.7410
2003-10-28    2.8365
Name: Close, Length: 247, dtype: float64

(247, 5)

(247,)

In [5]:
from models.base_model import BaseStockModel

AMZN_base_model = BaseStockModel(
    id='base_amzn_model',
    ticker='AMZN',
    input_dim=AMZN_dataset.X.shape[1],
    hidden_dim=10,
    layer_dim=2,
    output_dim=1
)

[32m 2025-09-04 23:09:41 - model - INFO - Model(id=base_amzn_model, ticker=('AMZN',), input_dimension=5)[0m


In [6]:
from torch.optim import Adam
from torch.nn import MSELoss

optimizer = Adam(AMZN_base_model.parameters(), lr=0.001)
loss_function = MSELoss()

In [None]:
from torch.utils.data import DataLoader

AMZN_dataloader = DataLoader(AMZN_dataset, batch_size=16, shuffle=True)

epochs = 5

for epoch in range(epochs):
    for X,y in AMZN_dataloader:
        optimizer.zero_grad()
        print(X.shape, y.shape)
        outputs = AMZN_base_model(X)
        loss = loss_function(outputs, y)
        loss.backward()
        optimizer.step()
        
        
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

torch.Size([16, 5]) torch.Size([16])
[37m 2025-09-04 23:09:43 - model - DEBUG - Forward pass with input shape: torch.Size([16, 5]), h0 shape: torch.Size([2, 5, 10]), c0 shape: torch.Size([2, 5, 10])[0m


RuntimeError: For unbatched 2-D input, hx and cx should also be 2-D but got (3-D, 3-D) tensors