In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import scipy.io as sio
import os
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# PINN Model Definition
class PINN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_classes):
        super(PINN, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes),
            nn.Softmax(dim=1)
        )
        self.regressor = nn.Sequential(
            nn.Linear(input_size + num_classes, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )

    def forward(self, x):
        class_probs = self.classifier(x)
        x_combined = torch.cat((x, class_probs), dim=1)
        return self.regressor(x_combined), class_probs

# Load and preprocess data from multiple .mat files
def load_all_data(folder_path):
    X_list, y_list, labels_list = [], [], []
    file_count = 0

    for file_name in os.listdir(folder_path):
        if file_name.endswith(".mat"):
            file_path = os.path.join(folder_path, file_name)
            data = sio.loadmat(file_path)

            # Extract metadata
            wind_angle = float(data.get('Wind_azimuth', [[0]])[0][0])
            height = float(data.get('Building_height', [[1]])[0][0])
            depth = float(data.get('Building_depth', [[1]])[0][0])
            height_ratio = height / max(depth, 1e-6)

            # Extract wind pressure coefficients
            Cp_matrix = data.get('Wind_pressure_coefficients', np.zeros((1, 256)))
            Cp_mean = np.mean(Cp_matrix, axis=0).flatten()

            # Ensure uniform size
            max_length = 256
            if len(Cp_mean) < max_length:
                Cp_mean = np.pad(Cp_mean, (0, max_length - len(Cp_mean)), mode='constant')
            else:
                Cp_mean = Cp_mean[:max_length]

            # Extract eave type
            eave_type = str(data.get('Eave_type', [['Unknown']])[0][0])

            # Append data
            X_list.append([wind_angle, height_ratio])
            y_list.append(Cp_mean)
            labels_list.append(eave_type)
            file_count += 1

    print(f"✅ Loaded {file_count} files.")

    return np.array(X_list), np.array(y_list), np.array(labels_list, dtype=object)

# Load all data
data_folder = "../data/DATAAIO"
X, y, labels = load_all_data(data_folder)

# Encode categorical labels
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
labels_encoded = encoder.fit_transform(labels.reshape(-1, 1))
X_combined = np.hstack((X, labels_encoded))

# Standardize features and target
scaler_X = StandardScaler()
X_combined = scaler_X.fit_transform(X_combined)
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train, y_train = torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32)
X_test, y_test = torch.tensor(X_test, dtype=torch.float32), torch.tensor(y_test, dtype=torch.float32)

# Move data to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
X_train, y_train, X_test, y_test = X_train.to(device), y_train.to(device), X_test.to(device), y_test.to(device)

# Create DataLoader for batch processing
batch_size = 32
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Model setup
input_size = X_train.shape[1]
hidden_size = 128
output_size = y_train.shape[1]
num_classes = labels_encoded.shape[1]
model = PINN(input_size, hidden_size, output_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# Training loop
epochs = 5000
for epoch in range(epochs):
    total_loss = 0.0
    model.train()

    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)

        optimizer.zero_grad()
        outputs, _ = model(batch_X)
        loss = criterion(outputs, batch_y)

        if torch.isnan(loss):
            print("NaN detected in loss, skipping update")
            continue

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()

        total_loss += loss.item()

    if epoch % 500 == 0:
        print(f'Epoch [{epoch}/{epochs}], Loss: {total_loss / len(train_loader):.4f}')

# Save trained model
torch.save(model.state_dict(), "pinn_model_gpu.pth")
print("✅ Model training complete. Saved as pinn_model_gpu.pth")

# Validation
model.eval()
with torch.no_grad():
    y_pred, _ = model(X_test)
    y_pred = torch.nan_to_num(y_pred).cpu().numpy()
    y_test = y_test.cpu().numpy()

# Compute metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print results
print(f'🔎 Validation Results:')
print(f'✅ MAE: {mae:.4f}')
print(f'✅ MSE: {mse:.4f}')
print(f'✅ RMSE: {rmse:.4f}')
print(f'✅ R² Score: {r2:.4f}')


✅ Loaded 60 files.
Epoch [0/5000], Loss: 1.0492
Epoch [500/5000], Loss: 0.0250
Epoch [1000/5000], Loss: 0.0043
Epoch [1500/5000], Loss: 0.0011
Epoch [2000/5000], Loss: 0.0004
Epoch [2500/5000], Loss: 0.0001
Epoch [3000/5000], Loss: 0.0001
Epoch [3500/5000], Loss: 0.0001
Epoch [4000/5000], Loss: 0.0001
Epoch [4500/5000], Loss: 0.0000
✅ Model training complete. Saved as pinn_model_gpu.pth
🔎 Validation Results:
✅ MAE: 0.3030
✅ MSE: 0.2177
✅ RMSE: 0.4666
✅ R² Score: 0.7592
