In [20]:
import os
import tensorflow as tf
import numpy as np
import scipy.io
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Data Loading Function
def load_and_preprocess_data(directory):
    """
    Load multiple .mat files, extract relevant features, and preprocess.
    """
    all_features = []
    all_regression_targets = []
    all_classification_targets = []

    rooftop_types = ["o", "a", "b", "c"]  # Encoding for classification

    for file in os.listdir(directory):
        if file.endswith(".mat"):
            data_path = os.path.join(directory, file)
            mat_data = scipy.io.loadmat(data_path)

            # Extract relevant fields
            wind_pressure_coefficients = mat_data['Wind_pressure_coefficients']
            roof_pitch = mat_data['Roof_pitch'].flatten()
            building_depth = mat_data['Building_depth'].flatten()
            building_breadth = mat_data['Building_breadth'].flatten()
            building_height = mat_data['Building_height'].flatten()
            wind_azimuth = mat_data['Wind_azimuth'].flatten()

            mean_pressure_coefficients = wind_pressure_coefficients.mean(axis=1)  # Regression target

            # Extract rooftop type from filename
            rooftop_label = None
            for idx, rooftop in enumerate(rooftop_types):
                if rooftop in file:
                    rooftop_label = idx
                    break
            if rooftop_label is None:
                continue  # Skip file if no valid rooftop type found

            num_samples = len(mean_pressure_coefficients)
            features = np.column_stack([
                np.tile(roof_pitch, num_samples // len(roof_pitch)),
                np.tile(building_depth, num_samples // len(building_depth)),
                np.tile(building_breadth, num_samples // len(building_breadth)),
                np.tile(building_height, num_samples // len(building_height)),
                np.tile(wind_azimuth, num_samples // len(wind_azimuth)),
            ])

            classification_labels = np.full(num_samples, rooftop_label)

            all_features.append(features)
            all_regression_targets.append(mean_pressure_coefficients)
            all_classification_targets.append(classification_labels)

    X = np.vstack(all_features)
    Y_regression = np.concatenate(all_regression_targets)
    Y_classification = np.concatenate(all_classification_targets)

    # Normalize features
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    # One-hot encode classification labels
    Y_classification = tf.keras.utils.to_categorical(Y_classification, num_classes=len(rooftop_types))

    return X, Y_regression, Y_classification

# Load data
data_dir = "../../data"  # Change this to your actual path
X, Y_regression, Y_classification = load_and_preprocess_data(data_dir)

# Split data
X_train, X_test, Y_reg_train, Y_reg_test, Y_class_train, Y_class_test = train_test_split(
    X, Y_regression, Y_classification, test_size=0.2, random_state=42
)

# Define Multi-Task Model
def create_multitask_model(input_shape, num_classes):
    input_layer = tf.keras.layers.Input(shape=(input_shape,))
    shared_layer = tf.keras.layers.Dense(128, activation='relu')(input_layer)
    shared_layer = tf.keras.layers.Dense(64, activation='relu')(shared_layer)

    # Regression Output
    regression_branch = tf.keras.layers.Dense(32, activation='relu')(shared_layer)
    regression_output = tf.keras.layers.Dense(1, activation='linear', name='regression_output')(regression_branch)

    # Classification Output
    classification_branch = tf.keras.layers.Dense(32, activation='relu')(shared_layer)
    classification_output = tf.keras.layers.Dense(num_classes, activation='softmax', name='classification_output')(classification_branch)

    model = tf.keras.Model(inputs=input_layer, outputs=[regression_output, classification_output])
    return model

input_shape = X_train.shape[1]
num_classes = Y_class_train.shape[1]
model = create_multitask_model(input_shape, num_classes)

# Compile Model
model.compile(
    optimizer='adam',
    loss={
        'regression_output': 'mean_squared_error',
        'classification_output': 'categorical_crossentropy'
    },
    metrics={
        'regression_output': 'mean_absolute_error',
        'classification_output': 'accuracy'
    },
    loss_weights={
        'regression_output': 1.0,
        'classification_output': 0.5
    }
)

# Train Model
model.fit(
    X_train,
    {'regression_output': Y_reg_train, 'classification_output': Y_class_train},
    epochs=100,
    batch_size=32,
    validation_split=0.1
)

# Evaluate Model
results = model.evaluate(
    X_test,
    {'regression_output': Y_reg_test, 'classification_output': Y_class_test}
)
print("Test Results:", results)


Epoch 1/100
[1m4747/4747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - classification_output_accuracy: 0.9973 - classification_output_loss: 0.0373 - loss: 0.0309 - regression_output_loss: 0.0123 - regression_output_mean_absolute_error: 0.0865 - val_classification_output_accuracy: 1.0000 - val_classification_output_loss: 8.8190e-06 - val_loss: 0.0112 - val_regression_output_loss: 0.0112 - val_regression_output_mean_absolute_error: 0.0825
Epoch 2/100
[1m4747/4747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - classification_output_accuracy: 1.0000 - classification_output_loss: 4.1990e-06 - loss: 0.0108 - regression_output_loss: 0.0108 - regression_output_mean_absolute_error: 0.0816 - val_classification_output_accuracy: 1.0000 - val_classification_output_loss: 3.5747e-07 - val_loss: 0.0108 - val_regression_output_loss: 0.0108 - val_regression_output_mean_absolute_error: 0.0823
Epoch 3/100
[1m4747/4747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1