In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from scipy.io import loadmat
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score
import glob

# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# --- GAN Implementation for Data Augmentation ---
class GAN(nn.Module):
    def __init__(self, input_dim, noise_dim):
        super(GAN, self).__init__()
        self.generator = nn.Sequential(
            nn.Linear(noise_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, input_dim),
        )

        self.discriminator = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )

    def generate(self, noise):
        return self.generator(noise)

    def discriminate(self, data):
        return self.discriminator(data)

# Train GAN
def train_gan(features, epochs=1000, batch_size=32):
    input_dim = features.shape[1]
    noise_dim = 10
    gan = GAN(input_dim, noise_dim).to(device)
    optimizer_g = torch.optim.Adam(gan.generator.parameters(), lr=0.0002)
    optimizer_d = torch.optim.Adam(gan.discriminator.parameters(), lr=0.0002)
    criterion = nn.BCELoss()

    real_data = torch.tensor(features, dtype=torch.float32).to(device)
    for epoch in range(epochs):
        # Discriminator training
        noise = torch.randn(batch_size, noise_dim).to(device)
        fake_data = gan.generate(noise)

        real_labels = torch.ones(batch_size, 1).to(device)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        d_loss_real = criterion(gan.discriminate(real_data[:batch_size]), real_labels)
        d_loss_fake = criterion(gan.discriminate(fake_data), fake_labels)
        d_loss = d_loss_real + d_loss_fake

        optimizer_d.zero_grad()
        d_loss.backward()
        optimizer_d.step()

        # Generator training
        noise = torch.randn(batch_size, noise_dim).to(device)
        fake_data = gan.generate(noise)
        g_loss = criterion(gan.discriminate(fake_data), real_labels)

        optimizer_g.zero_grad()
        g_loss.backward()
        optimizer_g.step()

    print(f"GAN training complete.")
    return gan

# Function to process multiple files
def process_wind_files(file_paths):
    results = []

    for file_path in file_paths:
        print(f"Processing file: {file_path}")

        # Load data
        mat_data = loadmat(file_path)

        # Extract fields
        roof_pitch = mat_data['Roof_pitch'].flatten()
        sample_frequency = mat_data['Sample_frequency'].flatten()
        building_depth = mat_data['Building_depth'].flatten()
        building_breadth = mat_data['Building_breadth'].flatten()
        building_height = mat_data['Building_height'].flatten()
        wind_azimuth = mat_data['Wind_azimuth'].flatten()
        wind_pressure_coefficients = mat_data['Wind_pressure_coefficients']

        mean_pressure_coefficients = wind_pressure_coefficients.mean(axis=1)

        # Create feature DataFrame
        num_samples = mean_pressure_coefficients.shape[0]
        features = pd.DataFrame({
            "Roof_pitch": np.tile(roof_pitch, num_samples // len(roof_pitch)),
            "Sample_frequency": np.tile(sample_frequency, num_samples // len(sample_frequency)),
            "Building_depth": np.tile(building_depth, num_samples // len(building_depth)),
            "Building_breadth": np.tile(building_breadth, num_samples // len(building_breadth)),
            "Building_height": np.tile(building_height, num_samples // len(building_height)),
            "Wind_azimuth": np.tile(wind_azimuth, num_samples // len(wind_azimuth)),
        })
        features["Mean_pressure_coefficient"] = mean_pressure_coefficients

        # Polynomial Features
        poly = PolynomialFeatures(degree=2, include_bias=False)
        poly_features = poly.fit_transform(features.drop(columns=["Mean_pressure_coefficient"]))
        feature_names = poly.get_feature_names_out(features.drop(columns=["Mean_pressure_coefficient"]).columns)
        poly_df = pd.DataFrame(poly_features, columns=feature_names)
        poly_df["Mean_pressure_coefficient"] = mean_pressure_coefficients

        # Train-Test Split
        X = poly_df.drop(columns=["Mean_pressure_coefficient"])
        y = poly_df["Mean_pressure_coefficient"]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # GAN Data Augmentation
        gan = train_gan(X_train_scaled, epochs=500)
        noise = torch.randn(500, 10).to(device)
        synthetic_data = gan.generate(noise).cpu().detach().numpy()
        X_train_augmented = np.vstack([X_train_scaled, synthetic_data])
        y_train_augmented = np.hstack([y_train, np.random.choice(y_train, size=500)])

        # Train XGBoost
        xgb_model = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)
        xgb_model.fit(X_train_augmented, y_train_augmented)
        y_pred_xgb = xgb_model.predict(X_test_scaled)

        # Metrics
        mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
        mape_xgb = mean_absolute_percentage_error(y_test, y_pred_xgb)
        mse_xgb = mean_squared_error(y_test, y_pred_xgb)
        r2_xgb = r2_score(y_test, y_pred_xgb)

        print(f"File: {file_path} - MAE: {mae_xgb:.4f}, MAPE: {mape_xgb:.4f}, MSE: {mse_xgb:.4f}, R²: {r2_xgb:.4f}")

        results.append({
            "file": file_path,
            "MAE": mae_xgb,
            "MAPE": mape_xgb,
            "MSE": mse_xgb,
            "R2": r2_xgb,
        })

    return results

# Process multiple files
file_paths = glob.glob("../data/Low-rise with eaves/roof type o/height 1;4/*.mat")
results = process_wind_files(file_paths)

# Display results
results_df = pd.DataFrame(results)
print(results_df)


Using device: cuda
Processing file: ../data/Low-rise with eaves/roof type o/height 1;4\Cp_ts_ROH06_deg000.mat
GAN training complete.
File: ../data/Low-rise with eaves/roof type o/height 1;4\Cp_ts_ROH06_deg000.mat - MAE: 0.0881, MAPE: 0.3133, MSE: 0.0124, R²: -0.0001
Processing file: ../data/Low-rise with eaves/roof type o/height 1;4\Cp_ts_ROH06_deg023.mat
GAN training complete.
File: ../data/Low-rise with eaves/roof type o/height 1;4\Cp_ts_ROH06_deg023.mat - MAE: 0.0959, MAPE: 0.3164, MSE: 0.0156, R²: -0.0001
Processing file: ../data/Low-rise with eaves/roof type o/height 1;4\Cp_ts_ROH06_deg045.mat
GAN training complete.
File: ../data/Low-rise with eaves/roof type o/height 1;4\Cp_ts_ROH06_deg045.mat - MAE: 0.0886, MAPE: 0.2113, MSE: 0.0125, R²: -0.0000
Processing file: ../data/Low-rise with eaves/roof type o/height 1;4\Cp_ts_ROH06_deg068.mat
GAN training complete.
File: ../data/Low-rise with eaves/roof type o/height 1;4\Cp_ts_ROH06_deg068.mat - MAE: 0.0922, MAPE: 0.3514, MSE: 0.0139, R