In [1]:
import os
import numpy as np
import pandas as pd
from scipy.io import loadmat
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score, accuracy_score, precision_score
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# --- Automatic File Loading ---
def load_mat_files(directory):
    data_frames = []
    for filename in os.listdir(directory):
        if filename.endswith(".mat"):
            filepath = os.path.join(directory, filename)
            mat_data = loadmat(filepath)

            # Extract relevant fields (customize based on data structure)
            wind_coeff = mat_data.get('Wind_pressure_coefficients')
            if wind_coeff is not None:
                mean_pressure = wind_coeff.mean(axis=1)
                features = {
                    'Roof_pitch': mat_data.get('Roof_pitch', [0])[0],
                    'Sample_frequency': mat_data.get('Sample_frequency', [0])[0],
                    'Building_depth': mat_data.get('Building_depth', [0])[0],
                    'Building_breadth': mat_data.get('Building_breadth', [0])[0],
                    'Building_height': mat_data.get('Building_height', [0])[0],
                    'Wind_azimuth': mat_data.get('Wind_azimuth', [0])[0],
                    'Mean_pressure_coefficient': mean_pressure
                }
                df = pd.DataFrame(features)
                data_frames.append(df)

    return pd.concat(data_frames, ignore_index=True) if data_frames else None

# Load all .mat files in directory
data_directory = "../data/Low-rise with eaves/roof type a/height 1;4/"  # Replace with your directory
all_data = load_mat_files(data_directory)
if all_data is None:
    raise ValueError("No valid data found in the directory.")

# --- Data Preprocessing ---
X = all_data.drop(columns=['Mean_pressure_coefficient'])
y = all_data['Mean_pressure_coefficient']

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# --- PIML Dataset and Model ---
class PressureDataset(Dataset):
    def __init__(self, features, targets):
        self.features = torch.tensor(features, dtype=torch.float32).to(device)
        self.targets = torch.tensor(targets.values, dtype=torch.float32).to(device)

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

train_dataset = PressureDataset(X_train, y_train)
test_dataset = PressureDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

class PIMLModel(nn.Module):
    def __init__(self, input_size):
        super(PIMLModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.dropout(torch.relu(self.fc3(x)))
        x = self.fc4(x)
        return x

# Model, Loss, Optimizer
model = PIMLModel(input_size=X_train.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train Model
num_epochs = 100
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0
    for features, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs.squeeze(), targets)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    train_losses.append(epoch_loss / len(train_loader))

    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for features, targets in test_loader:
            outputs = model(features)
            val_loss += criterion(outputs.squeeze(), targets).item()

    val_losses.append(val_loss / len(test_loader))
    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_losses[-1]:.4f}, Val Loss: {val_losses[-1]:.4f}")

# Evaluate Model
y_pred = []
y_true = []
model.eval()
with torch.no_grad():
    for features, targets in test_loader:
        outputs = model(features)
        y_pred.append(outputs.squeeze().cpu().numpy())
        y_true.append(targets.cpu().numpy())

y_pred = np.concatenate(y_pred)
y_true = np.concatenate(y_true)

# Metrics
mae = mean_absolute_error(y_true, y_pred)
mape = mean_absolute_percentage_error(y_true, y_pred)
mse = mean_squared_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print(f"PIML - MAE: {mae:.4f}, MAPE: {mape:.4f}, MSE: {mse:.4f}, R²: {r2:.4f}")

# Plot Training and Validation Loss
plt.figure(figsize=(8, 6))
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training vs Validation Loss")
plt.legend()
plt.grid(True)
plt.show()


Using device: cuda


ValueError: All arrays must be of the same length