In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import scipy.io as sio
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define PINN with physics-informed loss
class PINN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_classes):
        super(PINN, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes),
            nn.Softmax(dim=1)
        )
        self.regressor = nn.Sequential(
            nn.Linear(input_size + num_classes, hidden_size),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_size),
            nn.Dropout(0.2),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_size),
            nn.Dropout(0.2),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        class_probs = self.classifier(x)
        x_combined = torch.cat((x, class_probs), dim=1)
        return self.regressor(x_combined), class_probs

# Physics-based loss function
def physics_loss(y_pred, y_true):
    wind_resistance = torch.mean(torch.abs(y_pred - y_true))  # Simplified constraint
    return nn.MSELoss()(y_pred, y_true) + 0.1 * wind_resistance

# Load and preprocess data
def load_all_data(folder_path):
    X_list, y_list, labels_list = [], [], []
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".mat"):
            file_path = os.path.join(folder_path, file_name)
            data = sio.loadmat(file_path)

            wind_angle = float(data.get('Wind_azimuth', [[0]])[0][0])
            height = float(data.get('Building_height', [[1]])[0][0])
            depth = float(data.get('Building_depth', [[1]])[0][0])
            height_ratio = height / max(depth, 1e-6)  # Avoid division by zero
            eave_type = str(data.get('Eave_type', [['Unknown']])[0][0])

            Cp_matrix = data.get('Wind_pressure_coefficients', np.zeros((1, 1)))
            Cp_flat = Cp_matrix.flatten()

            X_list.append([wind_angle, height_ratio])
            y_list.append(Cp_flat)
            labels_list.append(eave_type)

    max_length = max(len(arr) for arr in y_list)
    y_padded = np.array([np.pad(arr, (0, max_length - len(arr)), mode='constant') for arr in y_list])

    return np.array(X_list), y_padded, np.array(labels_list, dtype=object)

# Directory containing .mat files
data_folder = "../data/DATAAIO"
X, y, labels = load_all_data(data_folder)

# Encode categorical labels
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
labels_encoded = encoder.fit_transform(labels.reshape(-1, 1))
X_combined = np.hstack((X, labels_encoded))

# Standardize data
scaler_X = StandardScaler()
X_combined = scaler_X.fit_transform(X_combined)
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42)

# Convert to tensors
X_train, y_train = torch.tensor(X_train, dtype=torch.float32).to(device), torch.tensor(y_train, dtype=torch.float32).to(device)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32).to(device), torch.tensor(y_test, dtype=torch.float32).to(device)

# Model setup
input_size = X_train.shape[1]
hidden_size = 64  # Increased complexity
output_size = y_train.shape[1]
num_classes = labels_encoded.shape[1]
model = PINN(input_size, hidden_size, output_size, num_classes).to(device)
criterion = physics_loss
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=200, gamma=0.5)

# Training loop
epochs = 1000
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs, _ = model(X_train)
    loss = criterion(outputs, y_train)
    if torch.isnan(loss):
        print("NaN detected in loss, skipping update")
        continue
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
    optimizer.step()
    scheduler.step()
    if epoch % 100 == 0:
        print(f'Epoch [{epoch}/{epochs}], Loss: {loss.item():.4f}')

# Save model
torch.save(model.state_dict(), "pinn_model.pth")
print("Model training complete and saved as pinn_model.pth")

# Validation
model.eval()
with torch.no_grad():
    y_pred, _ = model(X_test)
    y_pred = torch.nan_to_num(y_pred).cpu().numpy()
    y_test = y_test.cpu().numpy()

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'Validation Results:')
print(f'MAE: {mae:.4f}')
print(f'MSE: {mse:.4f}')
print(f'RMSE: {rmse:.4f}')
print(f'R²: {r2:.4f}')


Epoch [0/1000], Loss: 1.5580


OutOfMemoryError: CUDA out of memory. Tried to allocate 898.00 MiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 9.04 GiB is allocated by PyTorch, and 1.45 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)