In [1]:
# All imports
from Imports import *
from Preprocessing import *
from Helper import *
%matplotlib inline
warnings.filterwarnings('ignore')

In [2]:
# Load dataset and device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Dada paths
TRAIN_FILE = '../2025-Quantathon-Tornado-Q-training_data-640-examples.xlsx'
TEST_FILE = '../2025-Quantum-Tornado-Q-test_data-200-examples.xlsx'

# Load training data
df_train = pd.read_excel(TRAIN_FILE)
# Load test data
df_test = pd.read_excel(TEST_FILE)
print(f"✓ Training data loaded: {df_train.shape[0]} rows, {df_train.shape[1]} columns")
print(f"✓ Test data loaded: {df_test.shape[0]} rows, {df_test.shape[1]} columns")

✓ Training data loaded: 640 rows, 10 columns
✓ Test data loaded: 200 rows, 10 columns


In [3]:
# Binary DN without Quantum Layer
class BinaryDNN(nn.Module):
    def __init__(self):
        super().__init__()

        # Encodes features from dataset
        self.feature_encoder = nn.Sequential(
            nn.Linear(8, 64),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.2),
            nn.Linear(64, 128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.2),
        )

        # Classifies based on encoded features
        self.classifier = nn.Sequential(
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, features):
        feats_encoded = self.feature_encoder(features)
        class_probs = self.classifier(feats_encoded)

        return class_probs  # Shape: (batch_size, 1)

In [4]:
# Define variable and dataset
batch_size = 128
lr = 1e-2
n_epochs = 300

X_train, y_train, X_test, y_test = Preprocess(df_train, df_test, balance = 'smote', classes = 'binary')

# Split training data into train + validation
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

train_data = ClassificationDataset(X_train, y_train)
validation_data = ClassificationDataset(X_val, y_val)
test_data = ClassificationDataset(X_test, y_test)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True, drop_last=True)
val_loader = DataLoader(validation_data, batch_size=64, shuffle=False, drop_last=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False, drop_last=True)

X_batch, y_batch = next(iter(train_loader))

print("Batch X dtype:", X_batch.dtype)
print("Batch y dtype:", y_batch.dtype)

print("Number of training samples:", len(train_data))
print("Number of validation samples:", len(validation_data))
print("Number of test samples:", len(test_data))


Batch X dtype: torch.float32
Batch y dtype: torch.float32
Number of training samples: 913
Number of validation samples: 229
Number of test samples: 200


In [5]:
# Initialize model, optimizer, and scheduler
DNN = BinaryDNN().to(device)
optimizer = optim.Adam(DNN.parameters(), lr=lr, betas=(0.9, 0.999))
scheduler_G = CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=1e-4)

# Empty array to track losses
losses = []

In [6]:
# # Plot visual of DNN
# graph =  draw_graph(
#     DNN,
#     input_size=(1, 8),
#     expand_nested=True,
#     depth = 1,
#     graph_name="DNN Architecture",
#     mode = 'train',
#     show_shapes = True,
#     hide_inner_tensors=True,   # Hides intermediate shapes
#     roll=True,                  # More compact layer grouping
#     save_graph=False,           # Set to True if you want to save
#     # node_attr={"color": "lightblue", "style": "filled"},  # Node styling
#     # edge_attr={"color": "gray"},                          # Edge styling
#     graph_dir= "TB",                         # Layout: TB (top-bottom), LR (left-right)
# )

# # Show in notebook
# graph.visual_graph

In [7]:
## Load a previous model
load = False

if load:
    # load_path = "models/Binary_DNN_" # Path to load model
    
    # Load the checkpoint
    checkpoint = torch.load(load_path)
    
    # Restore model weights
    DNN.load_state_dict(checkpoint["DNN_state_dict"])
    
    # Optionally restore tracking data
    losses = checkpoint["losses"]
        
    print(f"Loaded model from {load_path}")

In [9]:
bce = nn.BCELoss()
metric = accuracy
best_val_metric = 0.0

# Initialize a dictionary to store epoch-wise results
history = {
        'epoch': [],
        'train_loss': [],
        'train_metric': [],
        'val_loss': [],
        'val_metric': []
    }

best_val_metric = 999

metric = accuracy

for epoch in range(n_epochs):
    DNN.train()
    train_loss, train_metric = 0.0, 0.0

    for features, target in train_loader:
        features, target = features.to(device), target.unsqueeze(-1).to(device)
        
        optimizer.zero_grad()
        outputs = DNN(features)
        loss = bce(outputs, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_metric += metric(outputs, target)

    train_loss /= len(train_loader)
    train_metric /= len(train_loader)

    # Validation
    DNN.eval()
    val_loss, val_metric = 0.0, 0.0
    with torch.no_grad():
        for X_val, y_val in val_loader:
            X_val, y_val = X_val.to(device), y_val.unsqueeze(-1).to(device)
            outputs = DNN(X_val)
            val_loss += bce(outputs, y_val).item()
            val_metric += metric(outputs, y_val)

    val_loss /= len(val_loader)
    val_metric /= len(val_loader)

    # Logging
    history['epoch'].append(epoch)
    history['train_loss'].append(train_loss)
    history['train_metric'].append(train_metric)
    history['val_loss'].append(val_loss)
    history['val_metric'].append(val_metric)

    print(f'Epoch [{epoch+1}/{n_epochs}] | Train Loss: {train_loss:.4f} | '
          f'Train Acc: {train_metric:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_metric:.4f}')

    # Save best model
    if val_metric > best_val_metric:
        best_val_metric = val_metric
        os.makedirs("models", exist_ok=True)
        save_path = "models/DNN_model_best.pt"
        torch.save({
            "DNN_state_dict": DNN.state_dict(),
            "history": history,
        }, save_path)
        print(f"Model and history saved to {save_path}")


NameError: name 'history' is not defined

In [None]:
## Save Model
# Create output directory if it doesn't exist

save = True
if save:
    os.makedirs("models", exist_ok=True)
    
    # Timestamp for unique filenames
    timestamp = datetime.now().strftime("%m%d_%H%M")
    
    # Save model states and tracked data in a single file
    save_path = f"models/Binary_DNN_{timestamp}.pt"
    torch.save({
        "DNN_state_dict": DNN.state_dict(),
        "losses": losses,
    }, save_path)
    
    print(f"Model and statistics saved to {save_path}")