In [5]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error
from scipy.io import loadmat
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb

# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# --- Load and Preprocess Data ---
def load_and_preprocess_data(file_path):
    mat_data = loadmat(file_path)

    wind_pressure_coefficients = mat_data['Wind_pressure_coefficients']
    roof_pitch = mat_data['Roof_pitch'].flatten()
    building_depth = mat_data['Building_depth'].flatten()
    building_breadth = mat_data['Building_breadth'].flatten()
    building_height = mat_data['Building_height'].flatten()

    mean_pressure_coefficients = wind_pressure_coefficients.mean(axis=1)
    num_samples = len(mean_pressure_coefficients)

    features = pd.DataFrame({
        "Roof_pitch": np.tile(roof_pitch, num_samples // len(roof_pitch)),
        "Building_depth": np.tile(building_depth, num_samples // len(building_depth)),
        "Building_breadth": np.tile(building_breadth, num_samples // len(building_breadth)),
        "Building_height": np.tile(building_height, num_samples // len(building_height))
    })
    features["Mean_pressure_coefficient"] = mean_pressure_coefficients

    return features.drop(columns=["Mean_pressure_coefficient"]), features["Mean_pressure_coefficient"]

# Load data
file_path = "../../data/1-4 height and breath/2-2 depth-breath/14 roof pitch/"
features, targets = load_and_preprocess_data(file_path)

# Polynomial Features
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly.fit_transform(features)
feature_names = poly.get_feature_names_out(features.columns)
poly_df = pd.DataFrame(poly_features, columns=feature_names)
poly_df["Mean_pressure_coefficient"] = targets

# --- Correlation Analysis ---
plt.figure(figsize=(10, 8))
corr_matrix = poly_df.corr()
sns.heatmap(corr_matrix, annot=False, cmap="coolwarm")
plt.title("Feature Correlation Heatmap")
plt.show()

# --- Train-Test Split ---
X = poly_df.drop(columns=["Mean_pressure_coefficient"])
y = poly_df["Mean_pressure_coefficient"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# --- XGBoost Model ---
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',
    max_depth=8,
    learning_rate=0.001,
    n_estimators=50,
    subsample=0.5,
    colsample_bytree=0.5,
    random_state=42
)
xgb_model.fit(X_train_scaled, y_train)

y_pred_xgb = xgb_model.predict(X_test_scaled)
mae_xgb = mean_absolute_error(y_test, y_pred_xgb)
mape_xgb = mean_absolute_percentage_error(y_test, y_pred_xgb)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print(f"XGBoost - MAE: {mae_xgb:.4f}, MAPE: {mape_xgb:.4f}, MSE: {mse_xgb:.4f}, R²: {r2_xgb:.4f}")


Using device: cuda


FileNotFoundError: [Errno 2] No such file or directory: '../../data/1-4 height and breath/2-2 depth-breath/14 roof pitch/.mat'