In [1]:
# Importing the required libraries
from src.utils import base_model_config, create_and_run_model, plot_graph
from sklearn import preprocessing
import pandas as pd
import tensorflow as tf
from tensorflow.keras.regularizers import l1, l2, l1_l2
from sklearn.model_selection import RandomizedSearchCV
from src.utils import create_model
from scikeras.wrappers import KerasClassifier

In [4]:
## Acquire and process data

def get_rca_data():
    #Load the data file into a Pandas Dataframe
    symptom_data = pd.read_csv("../dataset/root_cause_analysis.csv")

    label_encoder = preprocessing.LabelEncoder()
    symptom_data['ROOT_CAUSE'] = label_encoder.fit_transform(
                                    symptom_data['ROOT_CAUSE'])

    #Convert Pandas DataFrame to a numpy vector
    np_symptom = symptom_data.to_numpy().astype(float)

    #Extract the feature variables (X)
    X_data = np_symptom[:,1:8]

    #Extract the target variable (Y), conver to one-hot-encoding
    Y_data=np_symptom[:,8]
    Y_data = tf.keras.utils.to_categorical(Y_data,3)

    return X_data,Y_data

In [None]:
## Tuning layers in the network

accuracy_measures = {}
layer_list =[]
for layer_count in range(1,6):
    
    #32 nodes in each layer
    layer_list.append(32)
    
    model_config = base_model_config()
    X,Y = get_rca_data()
    
    model_config["HIDDEN_NODES"] = layer_list
    model_name = "Layers-" + str(layer_count)
    history=create_and_run_model(model_config,X,Y,model_name)
    
    accuracy_measures[model_name] = history.history["accuracy"]
    
plot_graph(accuracy_measures,"Accuracy vs Layers")

In [None]:
# tuning the number of nodes in the network
accuracy_measures = {}

node_increment=8

for node_count in range(1,5):
    
    #have 2 hidden layers in the networks as selected above
    layer_list =[]
    for layer_count in range(2):
        layer_list.append(node_count * node_increment)
    
    model_config = base_model_config()
    X,Y = get_rca_data()
    
    model_config["HIDDEN_NODES"] = layer_list
    model_name = "Nodes-" + str(node_count * node_increment)
    history=create_and_run_model(model_config,X,Y, model_name)
    
    accuracy_measures[model_name] = history.history["accuracy"]

plot_graph(accuracy_measures,"Accuracy vs Nodes")

## Tuning backpropagation

In [None]:
# use optimizers

accuracy_measures = {}

optimizer_list = ['sgd','rmsprop','adam','adagrad']
for optimizer in optimizer_list:
    
    model_config = base_model_config()
    X,Y = get_rca_data()
    
    model_config["OPTIMIZER"] = optimizer
    model_name = "Optimizer-" + optimizer
    history=create_and_run_model(model_config,X,Y, model_name)
    
    accuracy_measures[model_name] = history.history["accuracy"]
    
plot_graph(accuracy_measures,"Accuracy vs Optimizers")

In [None]:
# tuning learning rate

accuracy_measures = {}

learning_rate_list = [0.001, 0.005,0.01,0.1,0.5]
for learning_rate in learning_rate_list:
    
    model_config = base_model_config()
    X,Y = get_rca_data()
    
    #Fix Optimizer to the one chosen above
    model_config["OPTIMIZER"]="rmsprop"
    model_config["LEARNING_RATE"] = learning_rate
    model_name="Learning-Rate-" + str(learning_rate)
    history=create_and_run_model(model_config,X,Y, model_name)
    
    #Using validation accuracy
    accuracy_measures[model_name] = history.history["accuracy"]
    
plot_graph(accuracy_measures,"Accuracy vs Learning Rate")

## Avoiding overfitting

In [None]:
# tuning regularizers
accuracy_measures = {}

regularizer_list = [l1(0.01), l2(0.01), l1_l2(l1=0.01, l2=0.01)]
regularizer_names = ['l1', 'l2', 'l1_l2']
for regularizer, reg_name in zip(regularizer_list, regularizer_names):
    
    model_config = base_model_config()
    X,Y = get_rca_data()
    
    model_config["REGULARIZER"] = regularizer
    model_name = "Regularizer-" + str(reg_name)
    history=create_and_run_model(model_config,X,Y, model_name)
    
    #Switch to validation accuracy
    accuracy_measures[model_name] = history.history["val_accuracy"]

plot_graph(accuracy_measures,"Validation Accuracy vs Regularizers")

In [None]:
# tuning dropout
accuracy_measures = {}

dropout_list = [0.0, 0.1, 0.2, 0.5]
for dropout in dropout_list:
    
    model_config = base_model_config()
    X,Y = get_rca_data()
    
    #Use the regularizer chosen above
    model_config["REGULARIZER"] = "l2"
    model_config["DROPOUT_RATE"] = dropout
    model_name="Dropout-" + str(dropout)
    history=create_and_run_model(model_config,X,Y, model_name)
    
    #Using validation accuracy
    accuracy_measures[model_name] = history.history["val_accuracy"]
    
plot_graph(accuracy_measures,"Validation Accuracy vs Dropout")

In [None]:
## Building the final model
# TODO: Double check optimal values for the model

accuracy_measures = {}

#Base Minimal Model
model_config = base_model_config()
model_config["HIDDEN_NODES"] = [16]
model_config["NORMALIZATION"] = None
model_config["OPTIMIZER"] = "rmsprop"
model_config["LEARNING_RATE"] = 0.001
model_config["REGULARIZER"]=None
model_config["DROPOUT_RATE"] = 0.0

X,Y = get_rca_data()

model_name = "Base-Model-" + str(layer_count)

history=create_and_run_model(model_config,X,Y,model_name)
    
accuracy_measures[model_name] = history.history["accuracy"]


#Adding all optimizations
model_config = base_model_config()
model_config["HIDDEN_NODES"] = [32,32]
model_config["NORMALIZATION"] = "batch"
model_config["OPTIMIZER"] = "rmsprop"
model_config["LEARNING_RATE"] = 0.001
model_config["REGULARIZER"]="l2"
model_config["DROPOUT_RATE"] = 0.2

X,Y = get_rca_data()

model_name = "Optimized-Model-" + str(layer_count)

history=create_and_run_model(model_config,X,Y,model_name)
    
accuracy_measures[model_name] = history.history["accuracy"]

plot_graph(accuracy_measures,f"Accuracy {model_name}")

In [5]:
# Load data
X, Y = get_rca_data()

# Wrap the model using KerasClassifier
pre_model = create_model(feature_numbers=X.shape[1], number_of_predicted_columns=Y.shape[1])
model = KerasClassifier(model=pre_model, epochs=50, batch_size=32, verbose=0) # TODO: read documentation to understand the parameters

# Define the parameter grid
param_grid = {
    'model__hidden_nodes': [[16], [32, 32], [64, 64]],
    'model__optimizer': ['rmsprop', 'adam', 'sgd', 'adagrad'],
    'model__learning_rate': [0.001, 0.005, 0.01],
    'model__regularizer': [None, l1(0.01), l2(0.01), l1_l2(l1=0.01, l2=0.01)],
    'model__dropout_rate': [0.0, 0.2, 0.5],
    'model__normalization': [None, 'batch']
}



# Use RandomizedSearchCV to find the best parameters
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=3, verbose=1, n_jobs=-1)
random_search_result = random_search.fit(X, Y)

# Print the best parameters and best score
print("Best: %f using %s" % (random_search_result.best_score_, random_search_result.best_params_))

  saveable.load_own_variables(weights_store.get(inner_path))


Fitting 3 folds for each of 10 candidates, totalling 30 fits


ValueError: 
All the 30 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\Tymur_Hilfatullin\Projects\ML\.venv\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Tymur_Hilfatullin\Projects\ML\.venv\Lib\site-packages\scikeras\wrappers.py", line 1501, in fit
    super().fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
  File "C:\Users\Tymur_Hilfatullin\Projects\ML\.venv\Lib\site-packages\scikeras\wrappers.py", line 770, in fit
    self._fit(
  File "C:\Users\Tymur_Hilfatullin\Projects\ML\.venv\Lib\site-packages\scikeras\wrappers.py", line 925, in _fit
    X, y = self._initialize(X, y)
           ^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Tymur_Hilfatullin\Projects\ML\.venv\Lib\site-packages\scikeras\wrappers.py", line 862, in _initialize
    self.model_ = self._build_keras_model()
                  ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Tymur_Hilfatullin\Projects\ML\.venv\Lib\site-packages\scikeras\wrappers.py", line 433, in _build_keras_model
    model = final_build_fn(**build_params)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: BaseWrapper._check_model_param.<locals>.final_build_fn() got an unexpected keyword argument 'regularizer'


In [None]:
# Train the default model
default_model_config = base_model_config()
X, Y = get_rca_data()
default_model_name = "Default-Model"
default_history = create_and_run_model(default_model_config, X, Y, default_model_name)

# Train the optimized model
optimized_model_config = base_model_config()
optimized_model_config["HIDDEN_NODES"] = [32, 32]
optimized_model_config["NORMALIZATION"] = "batch"
optimized_model_config["OPTIMIZER"] = "rmsprop"
optimized_model_config["LEARNING_RATE"] = 0.001
optimized_model_config["REGULARIZER"] = "l2"
optimized_model_config["DROPOUT_RATE"] = 0.2
optimized_model_name = "Optimized-Model"
optimized_history = random_search_result

# Collect accuracy measures
accuracy_measures = {
    default_model_name: default_history.history["accuracy"],
    optimized_model_name: optimized_history.history["accuracy"]
}

# Plot the accuracy measures
plot_graph(accuracy_measures, "Accuracy Comparison: Default vs Optimized Model")