In [1]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
from tensorflow import keras
from keras import layers, Model, optimizers, Input

def load_vector(filename, name):
    with h5py.File(filename, 'r') as f:
        data = f[name][:]
        print(f"Loaded dataset '{name}' with shape {data.shape}")
        return data

def save_vector(filename, name, data): # Creates dataset if it does not exist, overwrites if it does
    with h5py.File(filename, 'a') as f:  # open file in append mode
        if name in f:
            del f[name]  # delete old dataset before overwriting
        f.create_dataset(name, data=data, chunks=True, compression='gzip')
        print(f"Saved dataset '{name}' with shape {data.shape}")

file = 'CSTR_Simulation.h5'

C_A = load_vector(file, 'C_A')
T = load_vector(file, 'T')
T_C = load_vector(file, 'T_C')
h = load_vector(file, 'h')
Q = load_vector(file, 'Q_vec')
Q_C = load_vector(file, 'Q_c_vec')

l = load_vector(file, 'l')
Q_SP = load_vector(file, 'Q_SP')
l_C = load_vector(file, 'l_c')
Q_C_SP = load_vector(file, 'Qc_SP')

file = 'TestInputVectorsMinutes.h5'

T_F = load_vector(file, 'F4')
C_AF = load_vector(file, 'F5')
T_CF = load_vector(file, 'F6')
Q_F = load_vector(file, 'F7')

t = load_vector(file, 't')

F1 = load_vector(file, 'F1.plt').reshape(-1)*1
F2 = load_vector(file, 'F2.plt').reshape(-1)*2
F3 = load_vector(file, 'F3.plt').reshape(-1)*3
F4 = load_vector(file, 'F4.plt').reshape(-1)*4
F5 = load_vector(file, 'F5.plt').reshape(-1)*5
F6 = load_vector(file, 'F6.plt').reshape(-1)*6
F7 = load_vector(file, 'F7.plt').reshape(-1)*7
F8 = load_vector(file, 'F8.plt').reshape(-1)*8
F9 = load_vector(file, 'F9.plt').reshape(-1)*9

T_F = T_F[1:]
C_AF = C_AF[1:]
T_CF = T_CF[1:]
Q_F = Q_F[1:]
t = t[1:]
F1 = F1[1:]
F2 = F2[1:]
F3 = F3[1:]
F4 = F4[1:]
F5 = F5[1:]
F6 = F6[1:]
F7 = F7[1:]
F8 = F8[1:]
F9 = F9[1:]




Loaded dataset 'C_A' with shape (1800000, 1)
Loaded dataset 'T' with shape (1800000, 1)
Loaded dataset 'T_C' with shape (1800000, 1)
Loaded dataset 'h' with shape (1800000, 1)
Loaded dataset 'Q_vec' with shape (1800000, 1)
Loaded dataset 'Q_c_vec' with shape (1800000, 1)
Loaded dataset 'l' with shape (1800000, 1)
Loaded dataset 'Q_SP' with shape (1800000, 1)
Loaded dataset 'l_c' with shape (1800000, 1)
Loaded dataset 'Qc_SP' with shape (1800000, 1)
Loaded dataset 'F4' with shape (1800001, 1)
Loaded dataset 'F5' with shape (1800001, 1)
Loaded dataset 'F6' with shape (1800001, 1)
Loaded dataset 'F7' with shape (1800001, 1)
Loaded dataset 't' with shape (1800001,)
Loaded dataset 'F1.plt' with shape (1800001,)
Loaded dataset 'F2.plt' with shape (1800001,)
Loaded dataset 'F3.plt' with shape (1800001,)
Loaded dataset 'F4.plt' with shape (1800001,)
Loaded dataset 'F5.plt' with shape (1800001,)
Loaded dataset 'F6.plt' with shape (1800001,)
Loaded dataset 'F7.plt' with shape (1800001,)
Loaded d

In [2]:
faults = np.vstack([F1,F2,F3,F4,F5,F6,F7,F8,F9])

def combine_fault_vectors(faults):

    # For each time step, count how many faults are active (non-zero)
    active_counts = np.count_nonzero(faults, axis=0)
    
    # Initialize combined vector with zeros
    combined = np.zeros(faults.shape[1], dtype=int)
    
    # Find indices where exactly one fault is active
    single_fault_idx = np.where(active_counts == 1)[0]
    
    # For these indices, set combined to the fault number (non-zero value)
    # Since only one fault active, sum will give the fault number
    combined[single_fault_idx] = faults[:, single_fault_idx].sum(axis=0)
    
    # For indices where multiple faults active, set combined to 10
    multiple_fault_idx = np.where(active_counts > 1)[0]
    combined[multiple_fault_idx] = 10
    
    return combined

Fault_class = combine_fault_vectors(faults).reshape(-1, 1)

print(Fault_class.shape)

values, counts = np.unique(Fault_class, return_counts=True)

for val, count in zip(values, counts):
    print(f"Fault {val}: {count} occurrences")

df_full = np.hstack([Fault_class, C_A, T, T_C, h, Q, Q_C, Q_F, C_AF, T_F, T_CF, l, Q_SP, l_C, Q_C_SP])
df = df_full[::10, :]

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(df[df[:, 0] == 0, 1:])
df_scaled =sc.transform(df[:, 1:])

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_scaled, df[:, 0], test_size=0.3, random_state=41)

(1800000, 1)
Fault 0: 1668600 occurrences
Fault 1: 8880 occurrences
Fault 2: 31680 occurrences
Fault 3: 4200 occurrences
Fault 5: 32520 occurrences
Fault 6: 21600 occurrences
Fault 7: 4320 occurrences
Fault 8: 10200 occurrences
Fault 9: 10200 occurrences
Fault 10: 7800 occurrences


In [3]:
def build_autoencoder(
    input_dim,
    encoding_dim,
    hidden_layers=[64, 32],  # List of units for hidden layers before bottleneck
    activation='relu',
    output_activation='sigmoid',
    optimizer='adam',
    learning_rate=0.001,
    loss='mse'
):
    # Input layer
    input_layer = Input(shape=(input_dim,))

    # Encoder
    x = input_layer
    for units in hidden_layers:
        x = layers.Dense(units, activation=activation)(x)
    encoded = layers.Dense(encoding_dim, activation=activation, name='encoding_layer')(x)

    # Decoder (mirror of encoder)
    x = encoded
    for units in reversed(hidden_layers):
        x = layers.Dense(units, activation=activation)(x)
    output_layer = layers.Dense(input_dim, activation=output_activation)(x)

    # Autoencoder model: input to reconstructed output
    autoencoder = Model(inputs=input_layer, outputs=output_layer)

    # Encoder model: input to bottleneck representation
    encoder = Model(inputs=input_layer, outputs=encoded)

    # Set optimizer
    opt_lower = optimizer.lower()
    if opt_lower == 'adam':
        opt = optimizers.Adam(learning_rate=learning_rate)
    elif opt_lower == 'sgd':
        opt = optimizers.SGD(learning_rate=learning_rate)
    elif opt_lower == 'rmsprop':
        opt = optimizers.RMSprop(learning_rate=learning_rate)
    elif opt_lower == 'adagrad':
        opt = optimizers.Adagrad(learning_rate=learning_rate)
    else:
        raise ValueError("Unsupported optimizer selected.")

    # Compile autoencoder
    autoencoder.compile(optimizer=opt, loss=loss)

    # Return both models
    return autoencoder, encoder

# Build autoencoder model
autoencoder, encoder = build_autoencoder(
    input_dim=14,
    encoding_dim=4,
    hidden_layers=[10,6],  # Example hidden layers
    activation='relu',
    output_activation='sigmoid',
    optimizer='rmsprop',
    learning_rate=0.01,
    loss='mse')
autoencoder.summary()

# Autoencoder training
history = autoencoder.fit(
    X_train,
    X_train,  # Autoencoders use input as output
    epochs=10,
    batch_size=32,
    validation_data=(X_test, X_test),
    verbose=1)

# Evaluate the model
loss = autoencoder.evaluate(X_test, X_test, verbose=0)
print(f"Test loss: {loss}")

Epoch 1/10
[1m3938/3938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 593us/step - loss: 5.1721 - val_loss: 5.1440
Epoch 2/10
[1m3938/3938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 561us/step - loss: 5.1987 - val_loss: 5.1368
Epoch 3/10
[1m3938/3938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 561us/step - loss: 5.1774 - val_loss: 5.1318
Epoch 4/10
[1m3938/3938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 552us/step - loss: 4.9446 - val_loss: 5.1287
Epoch 5/10
[1m3938/3938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 558us/step - loss: 5.0997 - val_loss: 5.1264
Epoch 6/10
[1m3938/3938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 546us/step - loss: 5.1357 - val_loss: 5.1368
Epoch 7/10
[1m3938/3938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 533us/step - loss: 5.4242 - val_loss: 5.1297
Epoch 8/10
[1m3938/3938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 536us/step - loss: 5.2380 - val_loss: 5.1326
Epoch 9/

In [4]:
compressed_data = encoder.predict(df_scaled)



[1m5625/5625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 207us/step


In [None]:
X_train, X_test, y_train, y_test = train_test_split(compressed_data, df[:, 0], test_size=0.7)

from sklearn import svm

classifier = svm.SVC(kernel='rbf', C=1.0, gamma= 1)
classifier.fit(X_train, y_train)

from sklearn.metrics import classification_report, confusion_matrix
y_pred = classifier.predict(X_test)
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))

file = 'Classification_Results.h5'

save_vector(file, 'y_pred', y_pred)
save_vector(file, 'y_test', y_test)

Confusion Matrix:
[[116533     14     78      0     22    104      1      0      0      0]
 [   553     82      7      0      0      0      0      0      0      0]
 [  1786      0    418      0      0      4      0      0      0      3]
 [    27      0      0    268      0      0      0      0      0      0]
 [  1049      0      0      0   1282      1      0      0      0      0]
 [   983      0      0      0      0    500      0      0      0      0]
 [   177      0      0      0      0      0    130      0      0      0]
 [   233      0      0      0      0      0      0    482      0      0]
 [   720      0      0      0      0      0      0      0      1      0]
 [    42      0      9      0      0      0      0      0      0    491]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.95      1.00      0.98    116752
         1.0       0.85      0.13      0.22       642
         2.0       0.82      0.19      0.31      2211
         3.