In [1]:
from torch.utils.data import DataLoader, random_split
from src.data.dataLoader import StructuralBreakDataLoader
from src.data.sliding_window_dataset import create_dataset_for_training, create_dataset_for_testing

from sklearn.metrics import roc_auc_score
import torch
import torch.nn as nn

In [2]:
data_handler = StructuralBreakDataLoader()
data_handler.load_data(use_crunch=False)
train_dict, test_dict = data_handler.create_train_val_split(0.2)
train_dataset, train_dataset_analysis = create_dataset_for_training(data_dict=train_dict, stride=1)
test_dataset, test_dataset_analysis = create_dataset_for_testing(data_dict=test_dict)

INFO:src.data.dataLoader:Data loaded successfully from local files


Creating sliding window dataset...
Processed 100 series, generated 8100 windows so far...
Processed 200 series, generated 16200 windows so far...
Processed 300 series, generated 24300 windows so far...
Processed 400 series, generated 32400 windows so far...
Processed 500 series, generated 40500 windows so far...
Processed 600 series, generated 48600 windows so far...
Processed 700 series, generated 56700 windows so far...
Processed 800 series, generated 64800 windows so far...
Processed 900 series, generated 72900 windows so far...
Processed 1000 series, generated 81000 windows so far...
Processed 1100 series, generated 89100 windows so far...
Processed 1200 series, generated 97200 windows so far...
Processed 1300 series, generated 105300 windows so far...
Processed 1400 series, generated 113400 windows so far...
Processed 1500 series, generated 121500 windows so far...
Processed 1600 series, generated 129600 windows so far...
Processed 1700 series, generated 137700 windows so far...
P

In [11]:
trainDataLoader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
testDataLoader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)

In [12]:
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_dim, output_size, num_layers, noise=0.0, dropout_prob=0.0):
        """
        Parameters:
        - input_size: Number of input features.
        - hidden_dim: Dimension of the hidden layers.
        - output_size: Number of output features.
        - use_noise: Whether to apply Gaussian noise to the input.
        - dropout_prob: Dropout probability. Set to 0.0 to disable dropout.
        - num_hidden_layers: Number of fully connected hidden layers.
        """
        super(SimpleNN, self).__init__()

        self.input_size = input_size
        self.hidden_dim = hidden_dim
        self.output_size = output_size

        self.dropout_prob = dropout_prob

        # Define layers dynamically using nn.Sequential
        layers = []

        # layers.append(GaussianNoise(std=noise))

        # Input layer
        layers.append(nn.Linear(self.input_size, hidden_dim))
        layers.append(nn.ReLU())

        # Add additional hidden layers
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(hidden_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_prob))

        # Output layer
        layers.append(nn.Linear(hidden_dim, output_size))

        # Combine into a Sequential module
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        # Pass the input through the sequential layers
        out = self.network(x)
        return out


In [13]:
model   = SimpleNN(input_size=200, hidden_dim=64, output_size=1,
                   num_layers=2, noise=0.0, dropout_prob=0.0)
loss_fct = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

for epoch in range(10):
    # ---- TRAINING ----
    model.train()
    total_loss = 0.0
    for batch in trainDataLoader:
        X = batch["window"]            # [batch, 100]
        y = batch["has_break"].float() # [batch] or [batch,1]

        optimizer.zero_grad()          # reset grads
        logits = model(X)              # forward
        loss   = loss_fct(logits.squeeze(), y.squeeze())
        loss.backward()                # backward
        optimizer.step()               # update params

        total_loss += loss.item()

    avg_train_loss = total_loss / len(trainDataLoader)
    
    # ---- EVALUATION ----
    model.eval()
    y_trues, y_scores = [], []
    with torch.no_grad():
        for batch in testDataLoader:
            X = batch["window"]
            y = batch["has_break"].float()
            logits = model(X)

            # collect
            y_trues.append(y.view(-1).cpu())
            y_scores.append(logits.view(-1).cpu())
    
    # concatenate everything
    y_true  = torch.cat(y_trues).numpy()
    y_score = torch.cat(y_scores).numpy()
    roc_auc = roc_auc_score(y_true, y_score)

    print(f"Epoch {epoch+1:2d} — Train loss: {avg_train_loss:.4f}    ROC-AUC: {roc_auc:.4f}")


Epoch  1 — Train loss: 0.6115    ROC-AUC: 0.5000
Epoch  2 — Train loss: 0.6118    ROC-AUC: 0.5000
Epoch  3 — Train loss: 0.6108    ROC-AUC: 0.5000
Epoch  4 — Train loss: 0.6109    ROC-AUC: 0.5000
Epoch  5 — Train loss: 0.6108    ROC-AUC: 0.5000
Epoch  6 — Train loss: 0.6113    ROC-AUC: 0.5000
Epoch  7 — Train loss: 0.6110    ROC-AUC: 0.5000


KeyboardInterrupt: 

NameError: name 'Union' is not defined