In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models

2025-04-08 17:02:54.602736: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744149775.459392     522 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744149775.656451     522 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744149777.378623     522 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744149777.378650     522 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744149777.378652     522 computation_placer.cc:177] computation placer alr

In [2]:
import numpy as np
import pickle
import gzip
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [25]:
class CustomModel:
    def __init__(self, n, k, max_m_value):
        self.n = n
        self.k = k
        self.max_m_value = max_m_value

        self.P_matrices = None
        self.m_heights = None
        self.P_matrices_train = None
        self.P_matrices_test = None
        self.P_matrices_val = None
        self.m_heights_train = None
        self.m_heights_test = None
        self.m_heights_log_train = None
        self.m_heights_log_test = None
        self.m_heights_log_val = None
        self.eval_inputs = None
        self.eval_outputs = None
        self.P_matrices_train_aug = None
        self.m_heights_log_train_aug = None

        self.train_dataset = None
        self.val_dataset = None

        self.model_name = f'model_{self.n}_{self.k}.keras'
        self.model = None
        self.history = None
    def load_and_preprocess_data(self,filepath):
        with gzip.open(filepath, 'rb') as f:
            dataset = pickle.load(f)

        self.P_matrices = []
        self.m_heights = []

        for row in dataset:
            # Remove the last `inf`, this is an error due to bug in the dataset generation
            heights = row['m_heights'][:self.max_m_value]
            if len(heights) != self.max_m_value:
                continue
            if any(h == float('inf') or h <= 0 for h in heights):
                continue
            # Convert m-heights to log scale
            # log_heights = np.log(heights)
            P_matrix = np.array(row['P_matrix'], dtype=np.float32)

            self.P_matrices.append(P_matrix)
            # m_heights.append(log_heights)
            self.m_heights.append(heights)
        self.P_matrices = np.array(self.P_matrices, dtype=np.float32)
        self.m_heights = np.array(self.m_heights, dtype=np.float32)
        print("P_matrices shape:", self.P_matrices.shape)
        print("m_heights shape:", self.m_heights.shape)
        # pritnt range of all m-heights
        print("Range of m_heights:")
        print("Min:", np.min(self.m_heights, axis=0))
        print("Max:", np.max(self.m_heights, axis=0))

    def generate_permuted_dataset(self, P_matrices, m_heights, num_permutations=3):
        """Create augmented dataset by permuting columns of P_matrices."""
        augmented_P_matrices = []
        augmented_m_heights = []

        for i in range(len(P_matrices)):
            original = P_matrices[i]
            target = m_heights[i]

            # Keep original
            augmented_P_matrices.append(original)
            augmented_m_heights.append(target)

            # Generate random permutations
            for _ in range(num_permutations):
                permuted = original[:, np.random.permutation(original.shape[1])]
                augmented_P_matrices.append(permuted)
                augmented_m_heights.append(target)
        return np.array(augmented_P_matrices), np.array(augmented_m_heights)
    def train_test_val_split(self, val_split=0.2, test_split=0.2, num_permutations=4, batch_size=128, random_state=2342):

        if(self.P_matrices is None or self.m_heights is None):
            raise ValueError("Data not loaded. Please load the data first.")
        # Train-test split
        self.P_matrices_train, self.P_matrices_test,\
              self.m_heights_train, self.m_heights_test\
                 = train_test_split(self.P_matrices, self.m_heights, test_size=test_split, random_state=random_state)

        # Converting test set to evaluator format
        self.eval_inputs = {}
        self.eval_outputs = {}

        # Iterate through each P_matrix and corresponding m_heights in test_dataset
        for i in range(len(self.P_matrices_test)):
            P_matrix = self.P_matrices_test[i]
            m_heights_list = self.m_heights_test[i]
            for m in range(2, self.max_m_value + 1):  # m ranges from 2 to max_m_value
                key = f"[{self.n},{self.k},{m}]"

                # Add the P_matrix to the inputs dictionary
                if key not in self.eval_inputs:
                    self.eval_inputs[key] = []
                self.eval_inputs[key].append(P_matrix)

                # Add the corresponding m_height to the outputs dictionary
                if key not in self.eval_outputs:
                    self.eval_outputs[key] = []
                self.eval_outputs[key].append(m_heights_list[m - 1])  # m-1 to get the correct index

        # Change m_heights_train and m_heights_test to log_scale
        self.m_heights_log_train = np.log(self.m_heights_train)
        self.m_heights_log_test = np.log(self.m_heights_test)
        print("Range of log m_heights in training data:")
        print("Min:", np.min(self.m_heights_log_train, axis=0))
        print("Max:", np.max(self.m_heights_log_train, axis=0))

        # Train-validation split
        self.P_matrices_train, self.P_matrices_val, self.m_heights_log_train, self.m_heights_log_val = train_test_split(self.P_matrices_train, self.m_heights_log_train, test_size=val_split, random_state=random_state)
        # Generate augmented  training dataset
        self.P_matrices_train_aug, self.m_heights_log_train_aug = self.generate_permuted_dataset(self.P_matrices_train, self.m_heights_log_train, num_permutations=num_permutations)
        # Create TensorFlow datasets
        self.train_dataset = tf.data.Dataset.from_tensor_slices((self.P_matrices_train_aug, self.m_heights_log_train_aug))
        self.train_dataset = self.train_dataset.shuffle(self.P_matrices_train_aug.shape[0]).batch(batch_size).prefetch(tf.data.AUTOTUNE)

        self.val_dataset = tf.data.Dataset.from_tensor_slices((self.P_matrices_val, self.m_heights_log_val))
        self.val_dataset = self.val_dataset.shuffle(self.P_matrices_val.shape[0]).batch(batch_size).prefetch(tf.data.AUTOTUNE)
        print("Train dataset shape:", self.P_matrices_train_aug.shape, self.m_heights_log_train_aug.shape)
        print("Validation dataset shape:", self.P_matrices_val.shape, self.m_heights_log_val.shape)
        print("Test dataset shape:", self.P_matrices_test.shape, self.m_heights_log_test.shape)

    def model_compile(self, build_model, print_summary=False):
        if self.train_dataset is None or self.val_dataset is None:
            raise ValueError("Train and validation datasets not created. Please create them first.")
        self.model = build_model(self.n, self.k, self.max_m_value)
        print("Model compiled.")
        if print_summary:
            self.model.summary()

    def model_train(self, epochs=10, verbose=0, patience=5 , model_save_directory="."):
        if self.model is None:
            raise ValueError("Model not built. Please build the model first.")

        # Early stopping callback
        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
        model_checkpoint = tf.keras.callbacks.ModelCheckpoint(model_save_directory + '/' + self.model_name, save_best_only=True)
        # Train the model
        self.history = self.model.fit(
            self.train_dataset,
            validation_data=self.val_dataset,
            epochs=epochs,
            callbacks=[
                early_stopping,
                model_checkpoint,
            ],
            verbose=verbose
        )
    def plot_history(self):
        if self.history is None:
            raise ValueError("Model not trained. Train the model first.")
        history_dict = self.history.history
        mse = history_dict["loss"]
        val_mse = history_dict["val_loss"]
        epochs = range(1, len(mse) + 1)
        print('Train MSE :', mse)
        print('Validation MSE :', val_mse)
        plt.plot(epochs, mse, "bo", label="Training mse")
        plt.plot(epochs, val_mse, "b", label="Validation mse")
        plt.title("Training and validation mean squared error")
        plt.xlabel("Epochs")
        plt.ylabel("MSE")
        plt.legend()
        plt.show()
    def evaluate_model(self, batch_size=128):
        if self.history is None:
            raise ValueError("Model not trained. Train the model first.")
        results = self.model.evaluate(self.P_matrices_test, self.m_heights_log_test, batch_size=batch_size, verbose=0)
        print('Best model loss :', results)

In [4]:
models_dict =  {
    "model_1": {'n': 9, 'k': 4, 'max_m_value': 5, 'model': None, 'file_name': "G_9_4_maxM5.pklgz"},
    "model_2": {'n': 9, 'k': 5, 'max_m_value': 4, 'model': None, 'file_name': "G_9_5_maxM4.pklgz"},
    "model_3": {'n': 9, 'k': 6, 'max_m_value': 3, 'model': None, 'file_name': "G_9_6_maxM3.pklgz"},
    "model_4": {'n': 10, 'k': 4, 'max_m_value': 6, 'model': None, 'file_name': "G_10_4_maxM6.pklgz"},
    "model_5": {'n': 10, 'k': 5, 'max_m_value': 5, 'model': None, 'file_name': "G_10_5_maxM5.pklgz"},
    "model_6": {'n': 10, 'k': 6, 'max_m_value': 4, 'model': None, 'file_name': "G_10_6_maxM6.pklgz"},
}
import gc
gc.collect()

88

In [None]:
def build_model(n, k, max_m_value):
    input_shape = (k, n - k)  
    
    # Input layer
    input_layer = layers.Input(shape=input_shape)

    # Transpose so we can treat each column independently
    # After transpose: shape becomes (batch_size, n-k, k)
    transposed = layers.Permute((2, 1))(input_layer)  

    # Shared MLP applied to each column (like DeepSets)
    shared_mlp = tf.keras.Sequential([
        layers.Dense(64),
        layers.BatchNormalization(),
        layers.Activation('relu'),
        
        layers.Dense(64),
        layers.BatchNormalization(),
        layers.Activation('relu'),
    ])

    # Apply shared MLP to each column (TimeDistributed over n-k columns)
    encoded_columns = layers.TimeDistributed(shared_mlp)(transposed)  # (batch, n-k, 64)

    # Aggregate over columns to achieve    
    # avg = layers.GlobalAveragePooling1D()(encoded_columns)
    # max_ = layers.GlobalMaxPooling1D()(encoded_columns)
    # aggregated = layers.Concatenate()([avg, max_])
    aggregated = layers.GlobalAveragePooling1D()(encoded_columns)  # (batch, 64)oded_columns)  # (batch, 64)

    # Final prediction head
    output_layer = layers.Dense(max_m_value, activation='linear')(aggregated)

    model = models.Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer='adam', loss='mse')
    # model.compile(optimizer='adam', loss=tf.keras.losses.Huber())


    return model


In [6]:
import os
os.chdir("/mnt/d/MS/deep_learning/m_height_prediction")
samples_filepath = "/mnt/d/MS/deep_learning/m_height_prediction/samples_combined/"

In [27]:
from tensorflow.keras.utils import plot_model
model_save_directory = "model_6"
val_split = 0.2
test_split = 0.2
num_permutations = 5
batch_size = 128
random_state = 4542
epochs = 40
patience = 5
train_verbosity = 0
for model_name, model_info in models_dict.items():
    # if model_name != "model_1":
    #     break
    filepath = samples_filepath + model_info['file_name']
    n = model_info['n']
    k = model_info['k']
    max_m_value = model_info['max_m_value']
    model_info['model'] = CustomModel(n=n, k=k, max_m_value=max_m_value)
    print(f"Loading data for model_{n}_{k}...")
    model_info['model'].load_and_preprocess_data(filepath)
    print(f"\nGenerating dataset for model_{n}_{k}...")
    model_info['model'].train_test_val_split(val_split=val_split, test_split=test_split, num_permutations=num_permutations, batch_size=batch_size, random_state=random_state)
    print(f"\nCompiling model for model_{n}_{k}...")
    model_info['model'].model_compile(build_model, print_summary=False)
    # plot_model(model_info['model'].model, show_shapes=True, show_layer_names=True)
    print(f"\nTraining model for model_{n}_{k}...")
    model_info['model'].model_train(epochs=epochs, verbose=train_verbosity, patience=patience, model_save_directory = model_save_directory)
    print(f"\nEvaluating model for model_{n}_{k}...")
    model_info['model'].plot_history()
    print(f"\nPerformance on test set for model_{n}_{k}:")
    model_info['model'].evaluate_model(batch_size=batch_size)
    print(f"\nDone with model_{n}_{k}.")
    print("-----------------------------------------------------")

eval_inputs_combined = {}
eval_outputs_combined = {}

for model_name, model_info in models_dict.items():
    # if model_name != "model_1":
    #     break
    for key in model_info['model'].eval_inputs.keys():
        if key not in eval_inputs_combined:
            eval_inputs_combined[key] = []
        eval_inputs_combined[key].extend(model_info['model'].eval_inputs[key])
    for key in model_info['model'].eval_outputs.keys():
        if key not in eval_outputs_combined:
            eval_outputs_combined[key] = []
        eval_outputs_combined[key].extend(model_info['model'].eval_outputs[key])

with gzip.open(model_save_directory+'/eval_inputs_combined', 'wb') as f:
            pickle.dump(eval_inputs_combined, f)
with gzip.open(model_save_directory+'/eval_outputs_combined', 'wb') as f:
            pickle.dump(eval_outputs_combined, f)


Loading data for model_9_4...
P_matrices shape: (34999, 4, 5)
m_heights shape: (34999, 5)
Range of m_heights:
Min: [   3.3710756    8.660064    75.08341    182.32542   1253.2301   ]
Max: [2.7675543e+02 7.5315314e+02 4.8695015e+03 2.2775238e+05 7.9118080e+08]

Generating dataset for model_9_4...
Range of log m_heights in training data:
Min: [1.2152319 2.1587222 4.3185997 5.350549  7.1334796]
Max: [ 5.623134   6.2349315  8.4907465 12.336015  20.489037 ]
Train dataset shape: (134394, 4, 5) (134394, 5)
Validation dataset shape: (5600, 4, 5) (5600, 5)
Test dataset shape: (7000, 4, 5) (7000, 5)

Compiling model for model_9_4...
Model compiled.

Training model for model_9_4...


KeyboardInterrupt: 

In [8]:
from tamu_csce_636_project1 import Evaluator
evaluator = Evaluator(
    first_name="Your Name",
    last_name="Your Name",
    email="email@tamu.edu",
    print=False,
)

In [9]:
def predict_and_evaluate(n,k,m,P_matrices):
    ## load model
    model_name = model_save_directory + '/' + f'model_{n}_{k}.keras'
    model = tf.keras.models.load_model(model_name)
    m_heights = []
    P_matrices = np.array(P_matrices, dtype=np.float32)
    m_heights_pred = model.predict(P_matrices.reshape(len(P_matrices), k, n-k), verbose=0)
    m_heights =[np.exp(m_heights_pred[i][m-1]) for i in range(len(P_matrices))]
    # print(P_matrices[0])
    # print(m_heights_pred[0])
    # print(m_heights_pred[0][m-1])
    # print(np.exp(m_heights_pred[0][m-1]))
    # print(m_heights[0])
    return m_heights

In [13]:
model_save_directory = "model_6"
with gzip.open(model_save_directory+'/eval_inputs_combined', 'rb') as f:
    eval_inputs_combined = pickle.load(f)
with gzip.open(model_save_directory+'/eval_outputs_combined', 'rb') as f:
    eval_outputs_combined = pickle.load(f)
σ = evaluator.eval(
        inputs=eval_inputs_combined,
        outputs=eval_outputs_combined,
        func=predict_and_evaluate,
    )
average_σ = sum(σ.values()) / len(σ) if σ else 0
print(f"Average σ: {average_σ}")
for key, value in σ.items():
    print(f"{key}, σ: {value}")

Average σ: 1.324131338445433
(9, 4, 2), σ: 0.18715476713098955
(9, 4, 3), σ: 0.235722698718727
(9, 4, 4), σ: 0.8122218849954684
(9, 4, 5), σ: 3.296239620010989
(9, 5, 2), σ: 0.21686554545692777
(9, 5, 3), σ: 0.7954697368060312
(9, 5, 4), σ: 3.3317337588224682
(9, 6, 2), σ: 0.5595756932329841
(9, 6, 3), σ: 3.3427194167188463
(10, 4, 2), σ: 0.8695956655734514
(10, 4, 3), σ: 0.10224391711451145
(10, 4, 4), σ: 0.2553576547953333
(10, 4, 5), σ: 0.8969262676696201
(10, 4, 6), σ: 3.450894845210068
(10, 5, 2), σ: 0.11960933253703701
(10, 5, 3), σ: 0.3153405500508103
(10, 5, 4), σ: 0.9021643464201516
(10, 5, 5), σ: 3.5692605080877216
(10, 6, 2), σ: 0.2334483852068471
(10, 6, 3), σ: 0.8402805686969873
(10, 6, 4), σ: 3.473932944098122


In [22]:
model_save_directory = "model_15"
with gzip.open(model_save_directory+'/eval_inputs_combined', 'rb') as f:
    eval_inputs_combined = pickle.load(f)
with gzip.open(model_save_directory+'/eval_outputs_combined', 'rb') as f:
    eval_outputs_combined = pickle.load(f)
σ = evaluator.eval(
        inputs=eval_inputs_combined,
        outputs=eval_outputs_combined,
        func=predict_and_evaluate,
    )
average_σ = sum(σ.values()) / len(σ) if σ else 0
print(f"Average σ: {average_σ}")
for key, value in σ.items():
    print(f"{key}, σ: {value}")

Average σ: 1.3369110484151927
(9, 4, 2), σ: 0.16723566585624303
(9, 4, 3), σ: 0.22076046554700068
(9, 4, 4), σ: 0.8307825169388442
(9, 4, 5), σ: 3.3139499956751277
(9, 5, 2), σ: 0.19655960093129882
(9, 5, 3), σ: 0.7839523695241959
(9, 5, 4), σ: 3.3498014333850317
(9, 6, 2), σ: 0.5853356126510034
(9, 6, 3), σ: 3.418617047518573
(10, 4, 2), σ: 0.870909757980427
(10, 4, 3), σ: 0.09064671552202962
(10, 4, 4), σ: 0.248986426422921
(10, 4, 5), σ: 0.9003641177879229
(10, 4, 6), σ: 3.4834119412376188
(10, 5, 2), σ: 0.10344631007944895
(10, 5, 3), σ: 0.30639232039130126
(10, 5, 4), σ: 0.9126815335404459
(10, 5, 5), σ: 3.6486358668172185
(10, 6, 2), σ: 0.23215725476632176
(10, 6, 3), σ: 0.8627622276023892
(10, 6, 4), σ: 3.547742836543683


In [17]:
model_save_directory = "model_14"
with gzip.open(model_save_directory+'/eval_inputs_combined', 'rb') as f:
    eval_inputs_combined = pickle.load(f)
with gzip.open(model_save_directory+'/eval_outputs_combined', 'rb') as f:
    eval_outputs_combined = pickle.load(f)
σ = evaluator.eval(
        inputs=eval_inputs_combined,
        outputs=eval_outputs_combined,
        func=predict_and_evaluate,
    )
average_σ = sum(σ.values()) / len(σ) if σ else 0
print(f"Average σ: {average_σ}")
for key, value in σ.items():
    print(f"{key}, σ: {value}")

Average σ: 1.3420547103612788
(9, 4, 2), σ: 0.15783294877864124
(9, 4, 3), σ: 0.21744654207274894
(9, 4, 4), σ: 0.8197664727320247
(9, 4, 5), σ: 3.327273094212319
(9, 5, 2), σ: 0.18842748083876276
(9, 5, 3), σ: 0.7767539430189144
(9, 5, 4), σ: 3.415617086870581
(9, 6, 2), σ: 0.5664792600907335
(9, 6, 3), σ: 3.4397167269377222
(10, 4, 2), σ: 0.8679689114665337
(10, 4, 3), σ: 0.09507866568972716
(10, 4, 4), σ: 0.25411240895024056
(10, 4, 5), σ: 0.9323937398969352
(10, 4, 6), σ: 3.49916901364459
(10, 5, 2), σ: 0.10224265180293701
(10, 5, 3), σ: 0.3045049337522387
(10, 5, 4), σ: 0.9016705340662106
(10, 5, 5), σ: 3.6325295136671096
(10, 6, 2), σ: 0.2364360624962371
(10, 6, 3), σ: 0.8787114986539757
(10, 6, 4), σ: 3.56901742794767


In [10]:
model_save_directory = "model_13"
with gzip.open(model_save_directory+'/eval_inputs_combined', 'rb') as f:
    eval_inputs_combined = pickle.load(f)
with gzip.open(model_save_directory+'/eval_outputs_combined', 'rb') as f:
    eval_outputs_combined = pickle.load(f)
σ = evaluator.eval(
        inputs=eval_inputs_combined,
        outputs=eval_outputs_combined,
        func=predict_and_evaluate,
    )
average_σ = sum(σ.values()) / len(σ) if σ else 0
print(f"Average σ: {average_σ}")
for key, value in σ.items():
    print(f"{key}, σ: {value}")

Average σ: 1.3377592467696955
(9, 4, 2), σ: 0.17365590323652508
(9, 4, 3), σ: 0.2308594847306596
(9, 4, 4), σ: 0.8252615940863484
(9, 4, 5), σ: 3.43882251037085
(9, 5, 2), σ: 0.21270508490425663
(9, 5, 3), σ: 0.7694383907862355
(9, 5, 4), σ: 3.397880503674985
(9, 6, 2), σ: 0.5237900918288482
(9, 6, 3), σ: 3.18749291171175
(10, 4, 2), σ: 0.868691274230268
(10, 4, 3), σ: 0.10688689304575737
(10, 4, 4), σ: 0.25410796487092574
(10, 4, 5), σ: 0.8722170234569177
(10, 4, 6), σ: 3.521227586111585
(10, 5, 2), σ: 0.12608340335449197
(10, 5, 3), σ: 0.3363254893389921
(10, 5, 4), σ: 0.9393224277845033
(10, 5, 5), σ: 3.521088419941405
(10, 6, 2), σ: 0.26776244883086564
(10, 6, 3), σ: 0.8915959521932466
(10, 6, 4), σ: 3.6277288236741905
