In [12]:
import h5py
import numpy as np
import os

def load_h5_data(folder_path):
    # File names
    files = {
        "absorbance": "threelayers2d_AZO_Ag_AZO_0.1_absorbance.h5",
        "reflectance": "threelayers2d_AZO_Ag_AZO_0.1_reflectance.h5",
        "transmittance": "threelayers2d_AZO_Ag_AZO_0.1_transmittance.h5",
        "variables": "threelayers2d_AZO_Ag_AZO_0.1_variables.h5",
        "wavelengths": "threelayers2d_AZO_Ag_AZO_0.1_wavelengths.h5"
    }

    data = {}
    for key, file_name in files.items():
        with h5py.File(os.path.join(folder_path, file_name), 'r') as h5_file:
            data[key] = np.array(h5_file['data'])

    return data

folder_path = "C:\\Users\\lampgroup\\Desktop\\Mingxuan\\SPECTRA-LAYER\\examples\\data\\threelayers2d_AZO_Ag_AZO"
data = load_h5_data(folder_path)


In [3]:
import h5py
import numpy as np
import h5py
import numpy as np
import os

def load_and_prepare_data(folder_path):
    # Initialize lists to hold data
    absorbance_list = []
    reflectance_list = []
    transmittance_list = []
    variables_list = []
    wavelengths_list = []
    
    # Load data from files
    with h5py.File(os.path.join(folder_path, 'nanowires2d_GaAs_0.1_variables.h5'), 'r') as f:
        variables = np.array(f['data'])
        variables_list.append(variables)
    with h5py.File(os.path.join(folder_path, 'nanowires2d_GaAs_0.1_wavelengths.h5'), 'r') as f:
        wavelengths = np.array(f['data'])
        wavelengths_list.append(wavelengths)
    properties = ['absorbance', 'reflectance', 'transmittance']
    for prop in properties:
        with h5py.File(os.path.join(folder_path, f'nanowires2d_GaAs_0.1_{prop}.h5'), 'r') as f:
            prop_data = np.array(f['data'])
            if prop == 'absorbance':
                absorbance_list.append(prop_data)
            elif prop == 'reflectance':
                reflectance_list.append(prop_data)
            elif prop == 'transmittance':
                transmittance_list.append(prop_data)
    
    # Convert lists to arrays
    variables = np.concatenate(variables_list, axis=0)
    wavelengths = np.concatenate(wavelengths_list, axis=0)
    absorbance = np.concatenate(absorbance_list, axis=0)
    reflectance = np.concatenate(reflectance_list, axis=0)
    transmittance = np.concatenate(transmittance_list, axis=0)
    
    # Assuming wavelengths are the same for all samples and properties,
    # we only need one copy of the wavelengths array.
    # If this assumption is incorrect, adjust the handling of wavelengths accordingly.

    # Reshape or adjust variables as needed; for now, we assume it's ready for use.

    # Prepare X and Y
    # X could be variables or a combination of variables and structure information.
    # Y will be a combination of the optical properties and potentially wavelengths.
    
    # Example of preparing Y - combining optical properties; adjust based on your model's needs
    Y = np.stack([absorbance, reflectance, transmittance], axis=-1)
    
    return variables, wavelengths, Y

folder_path = "C:\\Users\\lampgroup\\Desktop\\Mingxuan\\SPECTRA-LAYER\\examples\\data\\nanowires2d_GaAs"
variables, wavelengths, Y = load_and_prepare_data(folder_path)

# Example of how to include structure information in X:
# Assuming 'variables' contains numeric data that describes each layer's properties,
# you might want to prepend or append structure-specific tokens or identifiers to 'variables'.
# This step depends significantly on how you've structured 'variables' and the specific requirements of your transformer model.



In [4]:
# Normalize variables for better training performance
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
variables_normalized = scaler.fit_transform(variables.reshape(-1, variables.shape[-1])).reshape(variables.shape)

# If your model expects a flat input sequence, you might flatten the variables
# This depends on whether your transformer architecture can handle multi-dimensional input directly
X = variables_normalized.reshape(-1, variables.shape[-1])
Y_reshape = Y.reshape(-1, Y.shape[-2] * Y.shape[-1])

In [46]:
X .shape

(39200, 3)

In [5]:
import torch
from torch import nn
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class SpectraTransformer(nn.Module):
    def __init__(self, input_dim, num_classes, nhead, nhid, nlayers, dropout=0.5):
        super(SpectraTransformer, self).__init__()
        self.encoder_layer = TransformerEncoderLayer(d_model=input_dim, nhead=nhead, 
                                                     dim_feedforward=nhid, dropout=dropout, 
                                                     batch_first=True)  # Ensure batch_first is set to True
        self.transformer_encoder = TransformerEncoder(self.encoder_layer, num_layers=nlayers)
        self.decoder = nn.Linear(input_dim, num_classes)

    def forward(self, src):
        output = self.transformer_encoder(src)  # No need to permute if using batch_first=True
        output = self.decoder(output)
        return output


# Example instantiation
# input_dim: Flatten size of `variables` for each sample
# num_classes: The number of output classes or regression targets (e.g., len(wavelengths) * 3 for A, R, T)
model = SpectraTransformer(input_dim=X.shape[1], num_classes=Y_reshape.shape[-1], nhead=3, nhid=256, nlayers=3, dropout=0.5)




In [6]:
from sklearn.model_selection import train_test_split

# Assuming X and Y have been appropriately reshaped
# Split into train+val and test
X_train_val, X_test, Y_train_val, Y_test = train_test_split(X, Y_reshape, test_size=0.2, random_state=42)

# Split train+val into train and val
X_train, X_val, Y_train, Y_val = train_test_split(X_train_val, Y_train_val, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2


In [7]:
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim

# Convert to PyTorch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
Y_train_t = torch.tensor(Y_train, dtype=torch.float32)
X_val_t = torch.tensor(X_val, dtype=torch.float32)
Y_val_t = torch.tensor(Y_val, dtype=torch.float32)
X_test_t = torch.tensor(X_test, dtype=torch.float32)
Y_test_t = torch.tensor(Y_test, dtype=torch.float32)

# Create TensorDataset
train_dataset = TensorDataset(X_train_t, Y_train_t)
val_dataset = TensorDataset(X_val_t, Y_val_t)
test_dataset = TensorDataset(X_test_t, Y_test_t)

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

for epoch in range(100):
    model.train()  # Set the model to training mode
    train_loss = 0.0
    # Training loop
    for inputs, targets in train_loader:
        optimizer.zero_grad()  # Clear the gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, targets)  # Compute the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update the parameters
        train_loss += loss.item() * inputs.size(0)  # Aggregate the loss
    train_loss /= len(train_loader.dataset)  # Average the loss

    # Validation loop
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    with torch.no_grad():  # No gradients needed for validation
        for inputs, targets in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)  # Aggregate the loss
    val_loss /= len(val_loader.dataset)  # Average the loss
    
    print(f'Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')




Epoch 1, Training Loss: 0.3229, Validation Loss: 0.1567
Epoch 2, Training Loss: 0.1508, Validation Loss: 0.1331
Epoch 3, Training Loss: 0.1439, Validation Loss: 0.1324
Epoch 4, Training Loss: 0.1437, Validation Loss: 0.1324
Epoch 5, Training Loss: 0.1436, Validation Loss: 0.1324
Epoch 6, Training Loss: 0.1436, Validation Loss: 0.1324
Epoch 7, Training Loss: 0.1436, Validation Loss: 0.1324
Epoch 8, Training Loss: 0.1436, Validation Loss: 0.1324
Epoch 9, Training Loss: 0.1436, Validation Loss: 0.1323
Epoch 10, Training Loss: 0.1435, Validation Loss: 0.1319
Epoch 11, Training Loss: 0.1425, Validation Loss: 0.1299
Epoch 12, Training Loss: 0.1401, Validation Loss: 0.1271
Epoch 13, Training Loss: 0.1378, Validation Loss: 0.1252
Epoch 14, Training Loss: 0.1365, Validation Loss: 0.1244
Epoch 15, Training Loss: 0.1360, Validation Loss: 0.1242
Epoch 16, Training Loss: 0.1355, Validation Loss: 0.1238
Epoch 17, Training Loss: 0.1352, Validation Loss: 0.1229
Epoch 18, Training Loss: 0.1348, Validat

In [8]:
# Assuming `val_loader` is your DataLoader for the validation set
model.eval()  # Set the model to evaluation mode
val_loss = 0.0
num_samples = 0

with torch.no_grad():  # No gradient computation for inference
    for inputs, targets in val_loader:
        outputs = model(inputs)  # Get model predictions
        loss = criterion(outputs, targets)  # Calculate the loss
        val_loss += loss.item() * inputs.size(0)  # Aggregate the loss
        num_samples += inputs.size(0)

val_loss /= num_samples  # Average the loss over all validation samples

print(f'Validation Loss: {val_loss:.4f}')



Validation Loss: 0.1171
