# Base model for stock prediction

In [1]:
from tools.log_controller import LogController
from pathlib import Path

log_controller = LogController(config_path=Path('config', 'logging_config.json'))
log_controller.start()

In [2]:
from core.data_repository import DataRepository
from pathlib import Path

data_repository = DataRepository(repo_path=Path('repo'))
AMZN_periodic_data = data_repository.get_dataframes()['AMZN']

AMZN_periodic_data

[32m 2025-09-05 12:30:58 - data_repository - INFO - Loaded: AAPL[0m
[32m 2025-09-05 12:30:58 - data_repository - INFO - Loaded: AMZN[0m
[32m 2025-09-05 12:30:58 - data_repository - INFO - Loaded periodic stock market data for: {'AAPL', 'AMZN'}[0m


Unnamed: 0_level_0,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2002-10-29,0.9285,0.9460,0.9035,0.9255,144840000
2002-10-30,0.9540,0.9610,0.9235,0.9425,137776000
2002-10-31,0.9680,0.9740,0.9425,0.9515,136452000
2002-11-01,0.9900,0.9950,0.9545,0.9610,110988000
2002-11-04,0.9390,0.9905,0.9295,0.9695,259270000
...,...,...,...,...,...
2003-10-22,2.7015,2.8075,2.6865,2.8025,654038000
2003-10-23,2.7160,2.7260,2.6455,2.6490,292234000
2003-10-24,2.7255,2.7470,2.6500,2.7275,208764000
2003-10-27,2.7410,2.7625,2.7250,2.7450,127838000


In [3]:
from core.data_preparator import DataPreparator

AMZN_supervised_data = DataPreparator.reformat_periodic_to_supervised_data(
    dataframe=AMZN_periodic_data,
    target_column='Close',
    t=3
)

AMZN_supervised_data

[32m 2025-09-05 12:30:58 - data_preparator - INFO - Preparing data for target column: Close, with t=3[0m


Unnamed: 0_level_0,Close,Close_0,Close_1,Close_2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2002-11-01,0.9900,0.9680,0.9540,0.9285
2002-11-04,0.9390,0.9900,0.9680,0.9540
2002-11-05,0.9360,0.9390,0.9900,0.9680
2002-11-06,0.9670,0.9360,0.9390,0.9900
2002-11-07,0.9560,0.9670,0.9360,0.9390
...,...,...,...,...
2003-10-22,2.7015,2.9675,2.9795,2.9845
2003-10-23,2.7160,2.7015,2.9675,2.9795
2003-10-24,2.7255,2.7160,2.7015,2.9675
2003-10-27,2.7410,2.7255,2.7160,2.7015


In [4]:
from core.dataset import StockDataset
from sklearn.model_selection import train_test_split

AMZN_train_dataset, AMZN_test_dataset = train_test_split(AMZN_supervised_data, test_size=0.2, shuffle=True, random_state=42)
AMZN_train_dataset, AMZN_validation_dataset = train_test_split(AMZN_train_dataset, test_size=0.3, shuffle=True, random_state=42)

display(AMZN_train_dataset.shape, AMZN_validation_dataset.shape, AMZN_test_dataset.shape)

(139, 4)

(60, 4)

(50, 4)

In [5]:
AMZN_train_dataset = StockDataset(AMZN_train_dataset)
AMZN_validation_dataset = StockDataset(AMZN_validation_dataset)
AMZN_test_dataset = StockDataset(AMZN_test_dataset)

[32m 2025-09-05 12:31:01 - dataset - INFO - Created stock dataset for AMZN, focusing target: Close[0m
[32m 2025-09-05 12:31:01 - dataset - INFO - Created stock dataset for AMZN, focusing target: Close[0m
[32m 2025-09-05 12:31:01 - dataset - INFO - Created stock dataset for AMZN, focusing target: Close[0m


In [6]:
from models.base_model import BaseStockModel

AMZN_base_model = BaseStockModel(
    id='base_amzn_model',
    ticker='AMZN',
    input_dim=1,
    hidden_dim=4,
    layer_dim=1,
    output_dim=1
)

[32m 2025-09-05 12:31:01 - model - INFO - Model(id=base_amzn_model,
ticker=('AMZN',),
input_dimension=1),
hidden_dimension=4,
layer_dimension=1,
output_dimension=1)[0m


In [7]:
from torch.optim import Adam
from torch.nn import MSELoss

optimizer = Adam(AMZN_base_model.parameters(), lr=0.001)
loss_function = MSELoss()

In [16]:
from torch.utils.data import DataLoader
import numpy as np

AMZN_train_data_loader = DataLoader(AMZN_train_dataset, batch_size=16, shuffle=True)
AMZN_validation_data_loader = DataLoader(AMZN_validation_dataset, batch_size=10, shuffle=True)

epochs = 5

for epoch in range(epochs):
    for X,y in AMZN_train_data_loader:
        optimizer.zero_grad()
        X, y = X.reshape(-1,3,1), y.reshape(-1,1) # Adjust for t
        outputs, _ = AMZN_base_model(X)
        loss = loss_function(outputs, y)
        loss.backward()
        optimizer.step()
        
        
    print(f'Epoch [{epoch+1}/{epochs}], RMSE: {np.sqrt(loss.item()):.4f}')

Epoch [1/5], RMSE: 0.3431
Epoch [2/5], RMSE: 0.4626
Epoch [3/5], RMSE: 0.3330
Epoch [4/5], RMSE: 0.4666
Epoch [5/5], RMSE: 0.3746
