In [1]:
import tensorflow as tf
from keras import regularizers
from keras.callbacks import EarlyStopping
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.sparsity import keras as sparsity
import pandas as pd
import matplotlib.pyplot as plt
import csv
from tqdm import tqdm

In [2]:
def mse_loss(y_pred, y):
    return tf.reduce_mean(tf.square(y_pred - y))
tf.random.set_seed(42)
allData = r"C:\Users\funkb\DataspellProjects\MethaneDataScience\Output\LowPPMMatrix.csv"
df = pd.read_csv(allData)
df=df[df['Target PPM']!=150]
df = df.drop(columns=['Unnamed: 0','lowInterval','highInterval','Ratio'])
print(df.head())
test_sensor_id = 0  # Replace with the ID of the sensor you want in the test set
train_data = df[df['SensorID'] != test_sensor_id]
test_data = df[df['SensorID'] == test_sensor_id]

# Drop the SensorID column as we don't need it anymore
train_data = train_data.drop(columns=['SensorID'])
test_data = test_data.drop(columns=['SensorID'])
# Convert to TensorFlow tensors
train_data_tf = tf.convert_to_tensor(train_data, dtype=tf.float32)
test_data_tf = tf.convert_to_tensor(test_data, dtype=tf.float32)

   SensorID  Target PPM  Resistance  Temperature  RelativeHumidity
0       0.0         0.0   30.009404     0.795619          6.227832
1       3.0         0.0   28.086765     0.795619          6.227832
2       5.0         0.0   30.475486     0.795619          6.227832
3       7.0         0.0   31.911561     0.795619          6.227832
4       8.0         0.0   26.515605     0.795619          6.227832


In [3]:

train_data_shuffled = tf.random.shuffle(train_data_tf, seed=42)

# Separate features and labels
x_train, y_train = train_data_shuffled[:, 1:], train_data_shuffled[:, 0]
x_test, y_test = test_data_tf[:, 1:], test_data_tf[:, 0]

In [4]:

# Mean squared error loss function
def mse_loss(y_pred, y_true):
    return tf.reduce_mean(tf.square(y_pred - y_true))
def rmse(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))
def r_squared(y_true, y_pred):
    residual = tf.reduce_sum(tf.square(tf.subtract(y_true, y_pred)))
    total = tf.reduce_sum(tf.square(tf.subtract(y_true, tf.reduce_mean(y_true))))
    r2 = tf.subtract(1.0, tf.divide(residual, total))
    return r2

class RMSEHistory(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        if logs is None:
            logs = {}
        self.modelRMSE = []
        self.validationRMSE = []

    def on_epoch_end(self, epoch, logs=None):
        if logs is None:
            logs = {}
        self.modelRMSE.append(logs.get('rmse'))
        self.validationRMSE.append(logs.get('val_rmse'))

In [5]:
rmse_history = RMSEHistory()
# Compile and train the model
input_shape = (x_train.shape[1],)

In [None]:
# Initialize a CSV file to store the results
csvfile = open('experiment_results.csv', 'w', newline='')
csvwriter = csv.writer(csvfile)
csvwriter.writerow(['BaseNum', 'Run', 'Test_RMSE', 'Test_Accuracy'])

for baseNum in tqdm(range(1, 33), desc='BaseNum Loop'):  # Wrap outer loop with tqdm
    for run in tqdm(range(1, 101), desc=f'Run Loop for BaseNum {baseNum}', leave=False):  # Wrap inner loop with tqdm


        def build_model(input_shape, regularization_factor=0.01):
            model = tf.keras.Sequential([
                tf.keras.layers.Dense(baseNum * 2, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor), input_shape=input_shape),
                tf.keras.layers.Dense(baseNum*4, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor)),
                tf.keras.layers.Dense(baseNum*4, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor)),
                tf.keras.layers.Dense(baseNum*2, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor)),
                tf.keras.layers.Dense(1)
            ])
            return model

        with tf.device('/GPU:0'):
            model = build_model(input_shape)
            model.compile(optimizer='adam', loss=mse_loss, metrics=[r_squared, rmse])
            model.fit(x_train, y_train, epochs=1000, batch_size=50, validation_split=0.2, callbacks=[rmse_history],verbose=0)
            test_metrics = model.evaluate(x_test, y_test,verbose=0)
            test_loss, test_accuracy, test_rmse = test_metrics[0], test_metrics[1], test_metrics[2]
        # Save the results
        csvwriter.writerow([baseNum, run, test_rmse, test_accuracy])
        csvfile.flush()
csvfile.close()

BaseNum Loop:   0%|          | 0/32 [00:00<?, ?it/s]
Run Loop for BaseNum 1:   0%|          | 0/100 [00:00<?, ?it/s][A
Run Loop for BaseNum 1:   1%|          | 1/100 [00:29<48:53, 29.63s/it][A
Run Loop for BaseNum 1:   2%|▏         | 2/100 [00:59<48:15, 29.54s/it][A
Run Loop for BaseNum 1:   3%|▎         | 3/100 [01:28<47:29, 29.38s/it][A
Run Loop for BaseNum 1:   4%|▍         | 4/100 [01:57<47:04, 29.42s/it][A
Run Loop for BaseNum 1:   5%|▌         | 5/100 [02:27<46:33, 29.41s/it][A
Run Loop for BaseNum 1:   6%|▌         | 6/100 [02:56<45:48, 29.24s/it][A
Run Loop for BaseNum 1:   7%|▋         | 7/100 [03:25<45:12, 29.17s/it][A
Run Loop for BaseNum 1:   8%|▊         | 8/100 [03:54<44:38, 29.12s/it][A
Run Loop for BaseNum 1:   9%|▉         | 9/100 [04:23<44:04, 29.06s/it][A
Run Loop for BaseNum 1:  10%|█         | 10/100 [04:52<43:42, 29.14s/it][A
Run Loop for BaseNum 1:  11%|█         | 11/100 [05:21<43:17, 29.19s/it][A
Run Loop for BaseNum 1:  12%|█▏        | 12/100 [05:5

In [None]:
# Define custom metrics like mse_loss, r_squared, and rmse here
# NOTE: with =2 it is hit or miss whether the model will go down the best optimized path. rerun a few times to determine what the pest value is
baseNum = 2

# Define your original model with regularization
def build_model(input_shape, regularization_factor=0.01):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(baseNum*2, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor), input_shape=input_shape),
        tf.keras.layers.Dense(baseNum*4, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor)),
        tf.keras.layers.Dense(baseNum*4, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor)),
        tf.keras.layers.Dense(baseNum*2, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor)),
        # tf.keras.layers.Dense(baseNum, activation='relu', kernel_regularizer=regularizers.l2(regularization_factor)),
        tf.keras.layers.Dense(1)
    ])
    return model

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

with tf.device('/GPU:0'):
    # ... (regular model training)
    model = build_model(input_shape)
    model.compile(optimizer='adam', loss=mse_loss, metrics=[r_squared, rmse])
    model.fit(x_train, y_train, epochs=1000, batch_size=50, validation_split=0.2, callbacks=[rmse_history])

    if early_stopping.stopped_epoch > 0:
        print(f"Early stopping occurred at epoch {early_stopping.stopped_epoch}")
        print(f"Restoring model weights from the end of the best epoch.")
    else:
        print("Early stopping did not occur.")
    test_metrics = model.evaluate(x_test, y_test)
    test_loss, test_accuracy, test_rmse = test_metrics[0], test_metrics[1], test_metrics[2]
    print(f"Test Loss (MSE): {test_loss}")
    print(f"Test Accuracy: {test_accuracy*100}")
    print(f"Test RMSE: {test_rmse}")
    # Plotting RMSE values
    plt.figure(figsize=(10, 5))
    plt.plot(rmse_history.modelRMSE, label='Train RMSE')
    plt.plot(rmse_history.validationRMSE, label='Validation RMSE')
    plt.xlabel('Generation')
    plt.ylabel('RMSE')
    plt.title('RMSE During Training')
    plt.legend()
    plt.show()

    # Pruning
    pruning_params = {
        'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.0,
                                                    final_sparsity=0.0,
                                                    begin_step=len(x_train) // 50 * 10,
                                                    end_step=len(x_train) // 50 * 500)
    }

    model_for_pruning = sparsity.prune_low_magnitude(model, **pruning_params)
    model_for_pruning.compile(optimizer='adam', loss=mse_loss, metrics=[r_squared,rmse])

    callbacks = [
        sparsity.UpdatePruningStep(),
        early_stopping,
        rmse_history
    ]

    model_for_pruning.fit(x_train, y_train, epochs=200, batch_size=50, validation_split=0.2, callbacks=callbacks)

    # Remove the pruning wrappers to finalize the model
    final_model = sparsity.strip_pruning(model_for_pruning)


if early_stopping.stopped_epoch > 0:
    print(f"Early stopping occurred at epoch {early_stopping.stopped_epoch}")
    print(f"Restoring model weights from the end of the best epoch.")
else:
    print("Early stopping did not occur.")

# Make predictions
with tf.device('/GPU:0'):  # This line is optional
    test_metrics = model.evaluate(x_test, y_test)
    test_loss, test_accuracy, test_rmse = test_metrics[0], test_metrics[1], test_metrics[2]
    print(f"Test Loss (MSE): {test_loss}")
    print(f"Test Accuracy: {test_accuracy*100}")
    print(f"Test RMSE: {test_rmse}")


# Plotting RMSE values
plt.figure(figsize=(10, 5))
plt.plot(rmse_history.modelRMSE, label='Train RMSE')
plt.plot(rmse_history.validationRMSE, label='Validation RMSE')
plt.xlabel('Generation')
plt.ylabel('RMSE')
plt.title('RMSE During Training')
plt.legend()
plt.show()
# model.save('MethaneModel.keras')

In [None]:
model.save('MethaneModelBest391-2nodes.keras')

In [None]:
import matplotlib.pyplot as plt

# Generate predictions on the test data
with tf.device('/GPU:0'):  # Optional
    y_pred = model.predict(x_test)

# Flatten y_test and y_pred for plotting
y_test_flat = y_test.numpy().flatten()
y_pred_flat = y_pred.flatten()

# Create scatter plot
plt.figure(figsize=(8, 8))
plt.scatter(y_test_flat, y_pred_flat, alpha=0.5)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.axis('equal')
plt.axis('square')

# Plot a 45-degree line for reference
plt.plot([-100, 1200], [-100, 1200], '--', color='gray')

plt.show()


In [None]:
humidity_values = [0, 15, 30, 45, 60]  # Updated humidity values

fig, axs = plt.subplots(2, 3, figsize=(15, 10))  # Create a 2x3 grid of subplots

# Flatten the axs array in case it's 2D
axs = axs.flatten()

for i, H in enumerate(humidity_values):
    # Filter the data for the given humidity value
    mask = (df['RelativeHumidity'] >= H - 5) & (df['RelativeHumidity'] <= H + 5)
    df_filtered = df.loc[mask]

    xDataResistance1 = df_filtered.loc[:, 'Resistance']
    xDataTemp1 = df_filtered.loc[:, 'Temperature']
    xDataRH1 = df_filtered.loc[:, 'RelativeHumidity']
    yDataTargetPPM1 = df_filtered.loc[:, 'Target PPM']

    combined_df = pd.concat([xDataResistance1, xDataTemp1, xDataRH1], axis=1)


    with tf.device('/GPU:0'):  # Optional
          y_pred = model.predict(combined_df)  # Replace with your model's predict method
    df_filtered['Predicted PPM'] = y_pred

    # Create a scatter plot
    sc = axs[i].scatter(yDataTargetPPM1, y_pred, alpha=0.5)

    # Calculate mean prediction for each target PPM and plot it
    mean_pred_per_target_ppm = df_filtered.groupby('Target PPM')['Predicted PPM'].mean()
    axs[i].plot(mean_pred_per_target_ppm.index, mean_pred_per_target_ppm.values, color='r')

    # Add a linear dotted line for reference
    axs[i].plot(yDataTargetPPM1, yDataTargetPPM1, 'k:')

    axs[i].set_xlabel('Target PPM')
    axs[i].set_ylabel('Predicted PPM')
    axs[i].set_title(f'Humidity={H}%')

plt.tight_layout()  # Adjust layout to prevent overlapping
plt.show()