In [1]:
import torch
import os,sys
sys.path.append("..")

In [2]:
import sys, os
sys.path.append(os.path.abspath(".."))
sys.path.append(os.path.abspath("../.."))
sys.path.append("../surrogate_models")
import torch.nn as nn
import torch.nn.functional as F
import torch

activation_function_list = [torch.tanh, nn.ReLU(), nn.CELU(), nn.LeakyReLU(), nn.ELU(), nn.Hardswish(),torch.tanh, nn.ReLU(), nn.CELU(), nn.LeakyReLU(), torch.tanh]

class MultiLayerPerceptron_forward(nn.Module):
    def __init__(self, input_size, hidden_layers, num_classes, net_n):
        super(MultiLayerPerceptron_forward, self).__init__()
        #################################################################################
        # Initialize the modules required to implement the mlp with given layer   #
        # configuration. input_size --> hidden_layers[0] --> hidden_layers[1] .... -->  #
        # hidden_layers[-1] --> num_classes                                             #
        #################################################################################
        layers = []
        layers.append(nn.Linear((input_size), (hidden_layers[0])))
        for i in range(len(hidden_layers)-1):
            layers.append(nn.Linear((hidden_layers[i]), (hidden_layers[i+1])))

        layers.append(nn.Linear((hidden_layers[len(hidden_layers)-1]), (num_classes)))
        self.layers = nn.Sequential(*layers)
        self.net_n = net_n
        self.hidden_layers = hidden_layers
    def forward(self, x):
        #################################################################################
        # Implement the forward pass computations                                 #
        #################################################################################
        m = activation_function_list[self.net_n]
        for i in range(len(self.hidden_layers)):
            x = self.layers[i](x)
            x = m(x)
        x = (self.layers[len(self.hidden_layers)](x))
        out=x
        return out


class MultiLayerPerceptron_forward_classifier(nn.Module):
    def __init__(self, input_size, hidden_layers, num_classes):
        super(MultiLayerPerceptron_forward_classifier, self).__init__()
        #################################################################################
        # Initialize the modules required to implement the mlp with given layer   #
        # configuration. input_size --> hidden_layers[0] --> hidden_layers[1] .... -->  #
        # hidden_layers[-1] --> num_classes                                             #
        #################################################################################
        layers = []
        layers.append(nn.Linear((input_size), (hidden_layers[0])))
        # layers.append(nn.Linear((hidden_layers[0]), (hidden_layers[1])))
        # layers.append(nn.Linear((hidden_layers[1]), (hidden_layers[2])))
        for i in range(len(hidden_layers)-1):
            layers.append(nn.Linear((hidden_layers[i]), (hidden_layers[i+1])))

        layers.append(nn.Linear((hidden_layers[len(hidden_layers)-1]), (num_classes)))
        self.layers = nn.Sequential(*layers)
        self.hidden_size = hidden_layers
    def forward(self, x):
        #################################################################################
        # Implement the forward pass computations                                 #
        #################################################################################

        # x = F.relu(self.layers[0](x))
        # x = F.relu(self.layers[1](x))
        # x = F.relu(self.layers[2](x))
        for i in range(len(self.hidden_size)):
            x = F.relu(self.layers[i](x))
        x = (self.layers[len(self.hidden_size)](x))
        out = x
        # out = F.sigmoid(x)
        return out


#===================================================    

import torch
import torch.nn as nn
import torch.nn.functional as F

# List of activation functions
activation_function_list = [
    torch.tanh, nn.ReLU(), nn.CELU(), nn.LeakyReLU(), nn.ELU(),
    nn.Hardswish(), torch.tanh, nn.ReLU(), nn.CELU(), nn.LeakyReLU(), torch.tanh
]

class SelfAttention(nn.Module):
    def __init__(self, embed_dim):
        super(SelfAttention, self).__init__()
        self.embed_dim = embed_dim

        # Learnable linear projections
        self.query = nn.Linear(embed_dim, embed_dim)
        self.key   = nn.Linear(embed_dim, embed_dim)
        self.value = nn.Linear(embed_dim, embed_dim)

        self.scale = embed_dim ** 0.5

    def forward(self, x):
        # x shape: (batch, seq_len, embed_dim)
        Q = self.query(x)
        K = self.key(x)
        V = self.value(x)

        # Attention scores: (batch, seq_len, seq_len)
        scores = torch.bmm(Q, K.transpose(1, 2)) / self.scale
        attn_weights = F.softmax(scores, dim=-1)

        # Weighted sum: (batch, seq_len, embed_dim)
        attn_output = torch.bmm(attn_weights, V)
        return attn_output


class MultiLayerPerceptronWithCustomAttention(nn.Module):
    def __init__(self, input_size, hidden_layers, num_classes, net_n):
        super(MultiLayerPerceptronWithCustomAttention, self).__init__()

        # MLP Layers
        layers = []
        layers.append(nn.Linear(input_size, hidden_layers[0]))
        for i in range(len(hidden_layers) - 1):
            layers.append(nn.Linear(hidden_layers[i], hidden_layers[i + 1]))
        self.mlp_layers = nn.ModuleList(layers)
        self.activation = activation_function_list[net_n]
        
        self.attention = SelfAttention(embed_dim=hidden_layers[-1])
        self.final_linear = nn.Linear(hidden_layers[-1], num_classes)

    def forward(self, x):
        # MLP forward
        for layer in self.mlp_layers:
            x = self.activation(layer(x))

        # Reshape for attention: treat features as a sequence of length 1
        x = x.unsqueeze(1)  # (batch, 1, embed_dim)

        # Apply attention
        x = self.attention(x)

        # Flatten back to (batch, embed_dim)
        x = x.squeeze(1)

        # Final output
        out = self.final_linear(x)
        return out

#====================================================

class ResidualBlock1D(nn.Module):
    """
    A 1D residual block with optional downsampling.
    The first conv can do stride=2 if downsample=True, the second conv uses stride=1.
    We'll use kernel=5, pad=2 so that stride=1 doesn't reduce length at all.
    """
    def __init__(self, in_channels, out_channels, downsample=False):
        super().__init__()
        self.downsample = downsample

        # If we want to downsample, we set stride=2 for the first conv and the shortcut
        stride_first = 2 if downsample else 1
        kernel_size = 5
        pad = 2

        # 1) First conv
        self.bn1 = nn.BatchNorm1d(in_channels, momentum=0.9)
        self.conv1 = nn.Conv1d(in_channels, out_channels,
                               kernel_size=kernel_size,
                               stride=stride_first,
                               padding=pad)
        # 2) Second conv always stride=1
        self.bn2 = nn.BatchNorm1d(out_channels, momentum=0.9)
        self.conv2 = nn.Conv1d(out_channels, out_channels,
                               kernel_size=kernel_size,
                               stride=1,
                               padding=pad)

        # Shortcut (for downsample or channel mismatch)
        if downsample or (in_channels != out_channels):
            self.shortcut_conv = nn.Conv1d(in_channels, out_channels,
                                           kernel_size=1,
                                           stride=stride_first,
                                           padding=0)
        else:
            self.shortcut_conv = nn.Identity()

        self.lrelu = nn.LeakyReLU(0.2, inplace=True)

    def forward(self, x, debug=False):
        if debug:
            print(f"  [ResidualBlock] Input shape: {x.shape}")
        identity = self.shortcut_conv(x)     # Possibly stride=2 if downsample is True

        # First conv
        out = self.bn1(x)
        out = self.lrelu(out)
        out = self.conv1(out)
        if debug:
            print(f"    After conv1 shape: {out.shape}")

        # Second conv
        out = self.bn2(out)
        out = self.lrelu(out)
        out = self.conv2(out)
        if debug:
            print(f"    After conv2 shape: {out.shape}")

        # Add skip
        out = out + identity
        if debug:
            print(f"  [ResidualBlock] Output shape: {out.shape}\n")

        return out


class SurrogateModel1D(nn.Module):
    """
    A 1D ResNet-like approach for (batch, 2, n_points).
    We'll do four "groups" of blocks, each of which can downsample the length dimension in the first block.
    """
    def __init__(self, n_points=192, in_channels=2, depth=16, residual_list=[2,2,2,2]):
        super().__init__()
        self.depth = depth
        self.residual_list = residual_list

        # -- Initial Conv: kernel=5, pad=2, stride=1 => length stays 192
        self.conv_initial = nn.Conv1d(in_channels, depth, kernel_size=5, stride=1, padding=2)
        self.bn_initial = nn.BatchNorm1d(depth, momentum=0.9)
        self.lrelu = nn.LeakyReLU(0.2, inplace=True)

        # Group0: no downsample in first block
        self.blocks0 = nn.ModuleList([
            ResidualBlock1D(depth, depth, downsample=False)
            for _ in range(residual_list[0])
        ])
        # Group1: first block downsample => depth-> depth*2
        self.resblock1_0 = ResidualBlock1D(depth, depth*2, downsample=True)
        self.blocks1 = nn.ModuleList([
            ResidualBlock1D(depth*2, depth*2, downsample=False)
            for _ in range(residual_list[1]-1)
        ])
        # Group2
        self.resblock2_0 = ResidualBlock1D(depth*2, depth*4, downsample=True)
        self.blocks2 = nn.ModuleList([
            ResidualBlock1D(depth*4, depth*4, downsample=False)
            for _ in range(residual_list[2]-1)
        ])
        # Group3
        self.resblock3_0 = ResidualBlock1D(depth*4, depth*8, downsample=True)
        self.blocks3 = nn.ModuleList([
            ResidualBlock1D(depth*8, depth*8, downsample=False)
            for _ in range(residual_list[3]-1)
        ])

        # Final BN + global avg pool + FC(128) + BN + LReLU + FC(2) + Sigmoid
        self.bn_final = nn.BatchNorm1d(depth*8, momentum=0.9)
        self.fc1 = nn.Linear(depth*8, 128)
        self.bn_fc1 = nn.BatchNorm1d(128, momentum=0.9)
        self.fc2 = nn.Linear(128, 2)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x, debug=False):
        if debug:
            print(f"Input shape: {x.shape}")
        out = self.conv_initial(x)
        out = self.bn_initial(out)
        out = self.lrelu(out)
        if debug:
            print(f"After initial conv shape: {out.shape}")

        # group0
        for block in self.blocks0:
            out = block(out, debug=debug)
        if debug:
            print(f"After group0 shape: {out.shape}")

        # group1
        out = self.resblock1_0(out, debug=debug)
        for block in self.blocks1:
            out = block(out, debug=debug)
        if debug:
            print(f"After group1 shape: {out.shape}")

        # group2
        out = self.resblock2_0(out, debug=debug)
        for block in self.blocks2:
            out = block(out, debug=debug)
        if debug:
            print(f"After group2 shape: {out.shape}")

        # group3
        out = self.resblock3_0(out, debug=debug)
        for block in self.blocks3:
            out = block(out, debug=debug)
        if debug:
            print(f"After group3 shape: {out.shape}")

        out = self.bn_final(out)
        out = self.lrelu(out)
        if debug:
            print(f"After bn_final + lrelu shape: {out.shape}")

        # global avg pool => (batch, depth*8, 1)
        out = F.adaptive_avg_pool1d(out, 1)
        if debug:
            print(f"After global avg pool shape: {out.shape}")
        out = out.view(out.size(0), -1)  # => (batch, depth*8)

        out = self.fc1(out)
        out = self.bn_fc1(out)
        out = self.lrelu(out)
        if debug:
            print(f"After fc1 shape: {out.shape}")

        out = self.fc2(out)
        out = self.sigmoid(out)
        if debug:
            print(f"Output shape: {out.shape}")

        return out

#====================================================

class Hybrid_surrogate_MLP(nn.Module):
    def __init__(self, 
                 input_size, 
                 hidden_layers_cl_model ,
                 hidden_layers_cd_model , 
                 net_n_cl=3 , 
                 net_n_cd=3, 
                 path_cl_model = None, 
                 path_cd_model  =  None):
        super(Hybrid_surrogate_MLP, self).__init__()
        self.cl_forward_mlp = MultiLayerPerceptron_forward(input_size , hidden_layers_cl_model ,   num_classes=1  , net_n=net_n_cl)
        self.cd_forward_mlp = MultiLayerPerceptron_forward(input_size , hidden_layers_cd_model ,   num_classes=1  , net_n=net_n_cd)
        if path_cl_model:
            self.cl_forward_mlp.load_state_dict(torch.load(path_cl_model,map_location="cpu"))
        if path_cd_model:
            self.cd_forward_mlp.load_state_dict(torch.load(path_cd_model,map_location="cpu"))

    def forward(self, x):
        #################################################################################
        # Implement the forward pass computations                                 #
        #################################################################################
        cl = self.cl_forward_mlp(x)
        cd = self.cd_forward_mlp(x)
        return torch.stack([cl,cd],dim=1).squeeze(-1)
 



In [3]:
def weights_init(m):
    if type(m) == nn.Linear:
        m.weight.data.normal_(0.0, 1e-3)
        m.bias.data.fill_(0.)

def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)



In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, TensorDataset
import scipy.io as sio
import argparse
import numpy as np
import time
import matplotlib.pyplot as  plt


input_size = 192 * 2
# hidden_size = [150, 200, 200 , 150]
hidden_size = [200, 300, 300 , 200]
# hidden_size = [128, 64 , 32]
num_classes = 1
num_epochs = 250
learning_rate = 9e-4
# learning_rate = 1e-6
# learning_rate = 1e-7
learning_rate_decay = 0.999
reg = 0.001
batch_size = 128
device = "cuda" if torch.cuda.is_available() else "cpu"
num_workers = 2
patience = 100

# xs_train = np.load(rf"/home/bardiya/projects/diffusion_air_manifolding/codes/creative-generativeai-diffusion/data/raw/xs_train.npy")
# ys_train = np.load(rf"/home/bardiya/projects/diffusion_air_manifolding/codes/creative-generativeai-diffusion/data/raw/ys_train.npy")

db = np.load(rf"/home/bardiya/projects/diffusion_air_manifolding/codes/creative-generativeai-diffusion/src/surrogate_models/run_results.npy",allow_pickle=True).item()

print(db.keys())
# sys.exit()
xs_train = db["shapes"]
ys_train = db["performances"]
xs_train = xs_train.reshape(xs_train.shape[0],-1)
print(ys_train[:,-1])
print(f"{xs_train.shape=}")
print(f"{ys_train.shape=}")
x_train_tensor = torch.from_numpy(xs_train).float()
y_train_tensor = torch.from_numpy(ys_train).float()
# print(ys_train[:3])
# sys.exit()

dataset = TensorDataset(x_train_tensor, y_train_tensor)

lengths = [int(len(dataset)*0.9), len(dataset)-int(len(dataset)*0.9)]

train_dataset, val_dataset = torch.utils.data.random_split(dataset, lengths)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size , 
                                           num_workers=  num_workers,
                                           drop_last= True , 
                                           shuffle = True)
test_loader = torch.utils.data.DataLoader(dataset=val_dataset, 
                                         batch_size=batch_size , 
                                         num_workers = num_workers)



# model_mlp = MultiLayerPerceptronWithCustomAttention(input_size, hidden_size, num_classes,3).to(device)
model_mlp = MultiLayerPerceptron_forward_classifier(input_size, hidden_size, num_classes,3).to(device)
print(count_parameters(model_mlp))

# model_mlp.apply(weights_init)
# model_mlp.load_state_dict(torch.load("/home/bardiya/projects/diffusion_air_manifolding/codes/creative-generativeai-diffusion/src/surrogate_models/mlp_best_model.pt",map_location="cpu"))
model_mlp.to(device)


# criterion = criterion_MAE
# criterion_train= sum_mse
# criterion_val   = nn.MSELoss()  # Standard mean MSE
criterion_train= nn.BCEWithLogitsLoss()
criterion_val   = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=learning_rate)

########################################
# 5) Training loop with early stopping
########################################
best_loss = float('inf')
best_epoch = 0
epochs_no_improve = 0
lr_current = learning_rate

train_losses = []
test_losses  = []

start_time = time.time()

for epoch in range(1, num_epochs + 1):
    # ---- Training ----
    model_mlp.train()
    running_loss = 0.0
    for batch_idx, (features, labels) in enumerate(train_loader):
        features, temp_labels = features.to(device), labels.to(device)
        labels= torch.zeros((temp_labels.shape[0],1))
        for i in range(temp_labels.shape[0]):
            if temp_labels[i,0] == -1000:
                labels[i,0] = 0
            else:
                labels[i,0] = 1
        optimizer.zero_grad()
        outputs = model_mlp(features)  # shape (batch, 1)
    
        print(f"{outputs.shape=}")
        print(f"{labels.shape=}")
        sys.exit()
        loss = criterion_train(outputs.squeeze(dim=1), labels[:,0])   #cl
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * features.size(0)

        # if (batch_idx + 1) % 100 == 0:
        #     print(f"Epoch [{epoch}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}],"
        #           f" Train Loss (batch): {loss.item():.4f}")

    epoch_train_loss = running_loss / len(train_loader.dataset)
    train_losses.append(epoch_train_loss)

    # ---- Validation ----
    model_mlp.eval()
    test_loss = 0.0
    with torch.no_grad():
        for features, labels in test_loader:
            features, labels = features.to(device), labels.to(device)
            preds = model_mlp(features)
            # print(f"{preds.shape=}")
            # print(f"{labels[:,1].shape=}")
            # sys.exit()
            # test_loss += criterion_val(preds.squeeze(dim=1), labels[:,1]).item() * features.size(0)
            test_loss += criterion_val(preds.squeeze(dim=1), labels[:,0]).item() * features.size(0)
    epoch_test_loss = test_loss / len(test_loader.dataset)
    test_losses.append(epoch_test_loss)

    # Print log
    print(f"Epoch [{epoch}/{num_epochs}] - Train Loss: {epoch_train_loss:.6f}, Test Loss: {epoch_test_loss:.6f}")

    # Early stopping check
    if epoch_test_loss < best_loss:
        best_loss = epoch_test_loss
        best_epoch = epoch
        epochs_no_improve = 0
        torch.save(model_mlp.state_dict(), "mlp_best_model.pt")
        print(f"New best model at epoch {epoch} with test loss {best_loss:.6f}")
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print(f"No improvement for {patience} epochs. Early stopping at epoch {epoch}.")
            break

    # Update learning rate
    lr_current *= learning_rate_decay
    update_lr(optimizer, lr_current)

elapsed = time.time() - start_time
print(f"\nTraining completed in {elapsed:.2f} seconds. Best epoch: {best_epoch} with test loss {best_loss:.6f}")

# Save final model weights
# torch.save(model_mlp.state_dict(), "mlp_final_model.pt")
# torch.save(model_mlp.state_dict(), "mlp_cl_model.pt")
torch.save(model_mlp.state_dict(), "mlp_cd_model.pt")
print("Saved mlp_final_model.pt")


############################################################
# 6) Save and plot train/test losses in separate subplots
############################################################
train_losses = np.array(train_losses)
test_losses  = np.array(test_losses)

# Save them as .npy
np.save("train_losses.npy", train_losses)
np.save("test_losses.npy",  test_losses)

# Two subplots: one for Train Loss, one for Test Loss
fig, axs = plt.subplots(1, 2, figsize=(12, 4))

axs[0].plot(train_losses, label="Train Loss", color="blue")
axs[0].set_title("Train Loss vs Epoch")
axs[0].set_xlabel("Epoch")
axs[0].set_ylabel("Loss")
axs[0].grid(True, linestyle="--", alpha=0.7)
axs[0].legend()

axs[1].plot(test_losses, label="Test Loss", color="orange")
axs[1].set_title("Test Loss vs Epoch")
axs[1].set_xlabel("Epoch")
axs[1].set_ylabel("Loss")
axs[1].grid(True, linestyle="--", alpha=0.7)
axs[1].legend()

plt.tight_layout()
plt.show()

dict_keys(['latents', 'shapes', 'performances'])
[  7.  73. 182. ... 162.  93.  42.]
xs_train.shape=(10000, 384)
ys_train.shape=(9991, 3)


AssertionError: Size mismatch between tensors

In [22]:

db = np.load(rf"/home/bardiya/projects/diffusion_air_manifolding/codes/creative-generativeai-diffusion/src/surrogate_models/run_results.npy",allow_pickle=True).item()
xs_train = db["shapes"]
ys_train = db["performances"]
xs_train = xs_train.reshape(xs_train.shape[0],-1)
len(np.unique(ys_train[:,2])), len(ys_train)

(200, 9991)