In [1]:
!conda activate DSU-FIN
import torch
import os
from torch.utils.data import DataLoader
import torch.nn as nn
import time
from TCN_AE_model_2 import CustomDataset, encoder_decoder_tcn
from bokeh.plotting import figure, show, curdoc
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook
from jupyter_bokeh.widgets import BokehModel

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f' Device: {torch.cuda.get_device_name(0)}. Now using: {device.type}', '\n',
      f'Torch Version: {torch.version.__version__}')

if not os.getcwd() == 'e:\\python_projects\\notebooks\\Deep learning\\FIN':
    os.chdir('e:\\python_projects\\notebooks\\Deep learning\\FIN')

 Device: NVIDIA GeForce RTX 4070 Ti. Now using: cuda 
 Torch Version: 2.7.1+cu128


In [None]:
output_notebook()

source = ColumnDataSource(data={
    'epoch': [],
    'Train_loss': [],
    'Val_loss': []
})
p = figure(
    title="Losses Over Epochs",
    x_axis_label="Epoch",
    y_axis_label="Loss Value",
    width=1000,
    height=400
)
p.line(x='epoch', y='Train_loss', source=source, color='green', legend_label='Train Loss')
p.line(x='epoch', y='Val_loss', source=source, color='purple', legend_label='Validation Loss')
p.legend.location = "bottom_left"
p.legend.click_policy = "hide"

bokeh_widget = BokehModel(p)
display(bokeh_widget)

hour = 4 * 60 # 4*15 seconds * 60
win_size_hours = 24
win_size = win_size_hours * hour
stride = hour * 6

BATCH_SIZE = 24
N_EPOCHS = 100

last_epoch = 0

train_dataset = CustomDataset('tina_train_15s.parquet', win_size = win_size, stride = stride)
val_dataset = CustomDataset('tina_val_15s.parquet', win_size = win_size, stride = stride)
model = encoder_decoder_tcn() 
model = model.to(device)
lr = 0.00005
wd = 0.000001

optimizer = torch.optim.AdamW(model.parameters(), 
                              lr = lr, 
                              weight_decay = wd
                              )
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=N_EPOCHS, eta_min=0.000005)  
criterion = nn.SmoothL1Loss(reduction='mean')
# criterion = nn.MSELoss(reduction='mean')

train_losses = []
val_losses = []
epochs = []

best_loss = torch.inf

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

for ep in range(last_epoch, last_epoch+N_EPOCHS):
    start = time.time()
    epoch_train_loss = 0
    model.train(True)
    batch_num = 0
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()

        output = model(x_batch)
        loss = criterion(output, y_batch)
        epoch_train_loss += loss.item()
        loss.backward()
        optimizer.step()
        batch_num +=1

    avg_tr_loss = epoch_train_loss / len(train_loader)
    train_losses.append(avg_tr_loss)
    model.eval()
    total_val_loss = 0
    with torch.no_grad():
        for x_batch, y_batch in val_loader:
            x_batch = x_batch.to(device)
            y_batch = y_batch.to(device)
            
            output = model(x_batch)
            loss = criterion(output, y_batch)
            total_val_loss += loss.item()

    avg_val_loss = total_val_loss / len(val_loader)
    last_lr = optimizer.param_groups[0]['lr']
       
        
    val_losses.append(avg_val_loss)
    epochs.append(ep+1)

    if avg_val_loss < best_loss:
        no_improve_counter = 0
        best_loss = avg_val_loss
        model_state_name = f'model states\\TCN-AE_15s_SL1.pth'
        optimizer_state_name = f'model states\\optimizer_TCN-AE_15s_SL1.pth'
        torch.save(model.state_dict(), model_state_name)
        torch.save(optimizer.state_dict(), optimizer_state_name)
        print(f"Best model saved at epoch: {ep+1}")
    # if ep+1 >=5:        
    lr_scheduler.step()
    # if ep+1 >=30:
    #     no_improve_counter +=1
    #     if no_improve_counter >=5:
    #         print(f'Early stopping at epoch {ep+1}')
    #         break
    
    end = time.time()
    epoch_duration = end - start
    print(f'Epoch {ep+1}. Train Loss: {avg_tr_loss:.6f}. Validation Loss: {avg_val_loss:.6f}. Last LR: {last_lr:.8f}. time: {epoch_duration: .2f}s')
  
    source.data = {
        'epoch': epochs,
        'Train_loss': train_losses,
        'Val_loss': val_losses
            }


BokehModel(render_bundle={'docs_json': {'b2edcd1d-f18f-4f14-8eb2-ff5cf250434b': {'version': '3.7.3', 'title': …

Best model saved at epoch: 1
Epoch 1. Train Loss: 1.211976. Validation Loss: 1.037946. Last LR: 0.00005000. time:  25.28s
Epoch 2. Train Loss: 1.084794. Validation Loss: 1.051615. Last LR: 0.00004999. time:  24.72s
Best model saved at epoch: 3
Epoch 3. Train Loss: 1.042947. Validation Loss: 1.030520. Last LR: 0.00004996. time:  24.88s
Best model saved at epoch: 4
Epoch 4. Train Loss: 1.010016. Validation Loss: 0.987489. Last LR: 0.00004990. time:  24.83s
Best model saved at epoch: 5
Epoch 5. Train Loss: 0.979869. Validation Loss: 0.919657. Last LR: 0.00004982. time:  24.56s
Best model saved at epoch: 6
Epoch 6. Train Loss: 0.948251. Validation Loss: 0.863913. Last LR: 0.00004972. time:  25.05s
Best model saved at epoch: 7
Epoch 7. Train Loss: 0.921244. Validation Loss: 0.809757. Last LR: 0.00004960. time:  24.58s
Best model saved at epoch: 8
Epoch 8. Train Loss: 0.896023. Validation Loss: 0.753744. Last LR: 0.00004946. time:  24.54s
Best model saved at epoch: 9
Epoch 9. Train Loss: 0.8

In [5]:
from bokeh.io import export_png
export_png(p, filename="Model training MSE.png")

'e:\\python_projects\\notebooks\\Deep learning\\FIN\\Model training MSE.png'