In [35]:
%cd /kaggle/input/chemcancer-v2/src/
%mkdir /kaggle/working/Machine_Learning_models/
%mkdir /kaggle/working/Machine_Learning_models_results/

/kaggle/input/chemcancer-v2/src
mkdir: cannot create directory ‘/kaggle/working/Machine_Learning_models/’: File exists
mkdir: cannot create directory ‘/kaggle/working/Machine_Learning_models_results/’: File exists


In [36]:
import numpy as np
import matplotlib.pyplot as plt
import os
import time
from tensorflow.keras.optimizers import Adam
from data import *
from machine_learning_models import *
from deep_learning_models import *
from vision_transformer import *
from utils_dl_model import *
from utils_ml_model import print_ml_results
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [37]:
# Set the seed value.
SEED = 7
np.random.seed(SEED)

# Deep Learning parameters
DL_EPOCH = 500
DL_BATCH_SIZE = 32
DL_CNN_VERSION = 3
DL_TRANSFORMER_VISION_VERSION = 11
DL_BLS_VERSION = 1

DO_DL = False
CV_DL = True
OPT_DL = False

DO_CNN = False
DO_TRANSFORMER_VISION = False
DO_BLS = False
DO_ML = True

# Percentage of test set out of the dataset.
TEST_SET = 0.2

# Percentage of validation set out of the training dataset.
VAL_SET = 0.2

# Folder path associated with machine learning models
ml_models_folder = "/kaggle/working/Machine_Learning_models/"
ml_models_results_folder = "/kaggle/working/Machine_Learning_models_results/"

In [39]:
import matplotlib.pyplot as plt
import seaborn as sns

def plot_confusion_matrices(results):
    """
    Function to plot confusion matrices one by one.
    
    Parameters:
    results (dict): Dictionary containing the results of the machine learning models
    """
    for name, result in results.items():
        # Create a new figure for each model
        plt.figure(figsize=(5, 5))
        
        # Generate a confusion matrix heatmap
        confusion_matrix = np.array(result['Confusion Matrix'])
        sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap="Blues")
        
        # Set the plot labels
        plt.title(f'{name}')
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        
        # Display the plot
        plt.show()



In [50]:
def preprocess_with_pca(X, variance_threshold=0.95):
    """
    Preprocesses the data using PCA, selecting the number of components
    such that the specified variance threshold is retained.

    :param X: The already standardized input data.
    :param variance_threshold: The threshold for explained variance.
    :return: Data transformed by PCA and the PCA model.
    """
    pca_temp = PCA()
    pca_temp.fit(X)
    cumulative_variance_ratio = np.cumsum(pca_temp.explained_variance_ratio_)
    n_components = np.argmax(cumulative_variance_ratio >= variance_threshold) + 1
#
    pca = PCA(n_components)
    X_pca = pca.fit_transform(X)

    print(f"PCA with {n_components} components retaining {variance_threshold * 100}% of variance")
    
    return X_pca, pca


In [41]:

def load_extract_data(data_file):
    # Load the data
    print("Load the data")
    data = load_data(data_file)
    print(f"Data shape : {data.shape}")

    # Extract the feature and target data
    print("Extract the feature and target data")
    X, y = extract_data(data)
    print(f"X shaped: {X.shape}")
    print(f"y shaped: {y.shape}")

    return X, y

def extract_data_from_csv(filename="generated_data.csv"):
    # Read the CSV file into a DataFrame
    df_extracted = pd.read_csv(filename)
    
    # Split the DataFrame into features and labels
    X_extracted = df_extracted.drop(columns=["labels"]).values
    y_extracted = df_extracted["labels"].values
    
    return X_extracted, y_extracted

In [64]:
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix

def compute_tp_tn_fp_fn_percentage(y_test, y_pred, class_label):
    """
    Function to compute the True Positives (TP), True Negatives (TN),
    False Positives (FP), and False Negatives (FN) for a specific class as percentages.
    """
    cm = confusion_matrix(y_test, y_pred)
    total_samples = np.sum(cm)

    tp = cm[class_label, class_label] / total_samples
    fp = (sum(cm[:, class_label]) - cm[class_label, class_label]) / total_samples
    fn = (sum(cm[class_label, :]) - cm[class_label, class_label]) / total_samples
    tn = (total_samples - (tp + fp + fn)) / total_samples

    return tp, tn, fp, fn

# Replace compute_tp_tn_fp_fn with compute_tp_tn_fp_fn_percentage in the function train_and_evaluate_ml_models
def train_and_evaluate_ml_models(models, X, y, apply_pca=False, pca_variance_threshold=0.95, apply_filters_bg_subtraction=False, cv=5, standardize_data_func=None):
    results = {}

    # Create a StratifiedKFold object
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)
    
    for name, model in models.items():
        fold_metrics = {
            'accuracy': [],
            'precision': [],
            'recall': [],
            'f1_score': [],
            'confusion_matrix': [],
            'tp_tn_fp_fn': [],
            'tp': [],
            'tn': [],
            'fp': [],
            'fn': []
        }

        # Perform stratified cross-validation
        for train_index, test_index in skf.split(X, y):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            
            # Apply filters and background subtraction if enabled
            if apply_filters_bg_subtraction:
                print("Apply filters and background subtraction")
                X_train = apply_filters_and_background_subtraction(X_train)
                X_test = apply_filters_and_background_subtraction(X_test)
            
           # Standardize data if a standardization function is provided
            if standardize_data_func is not None:
                print("Standardize the data")
                X_train, X_test = standardize_data_func(X_train, X_test)

            # Apply PCA if enabled
            if apply_pca:
                print("Apply PCA")
                pca = PCA(n_components=pca_variance_threshold)
                X_train = pca.fit_transform(X_train)
                X_test = pca.transform(X_test)
            
            # Train the model
            print(f"Training {name} model...")
            model.fit(X_train, y_train)

            # Evaluate the model
            print(f"Evaluating {name} model...")
            y_pred = model.predict(X_test)
            accuracy = accuracy_score(y_test, y_pred)
            precision = precision_score(y_test, y_pred, average='macro')
            recall = recall_score(y_test, y_pred, average='macro')
            f1 = f1_score(y_test, y_pred, average='macro')
            confusion = confusion_matrix(y_test, y_pred)

            # Compute TP, TN, FP, FN for each class
            tp_tn_fp_fn = {class_label: compute_tp_tn_fp_fn_percentage(y_test, y_pred, class_label) for class_label in range(len(np.unique(y)))}

            # Append the tp, tn, fp, fn values to the corresponding lists
            for class_label, (tp, tn, fp, fn) in tp_tn_fp_fn.items():
                fold_metrics['tp'].append(tp)
                fold_metrics['tn'].append(tn)
                fold_metrics['fp'].append(fp)
                fold_metrics['fn'].append(fn)

            fold_metrics['accuracy'].append(accuracy)
            fold_metrics['precision'].append(precision)
            fold_metrics['recall'].append(recall)
            fold_metrics['f1_score'].append(f1)
            fold_metrics['confusion_matrix'].append(confusion)
            fold_metrics['tp_tn_fp_fn'].append(tp_tn_fp_fn)

        # Calculate the mean and standard deviation of the metrics from the CV folds
        results[name] = {
            'CV': cv,
            'Accuracy': np.mean(fold_metrics['accuracy']),
            'Precision': np.mean(fold_metrics['precision']),
            'Recall': np.mean(fold_metrics['recall']),
            'F1 Score': np.mean(fold_metrics['f1_score']),
            'Confusion Matrix': np.mean(fold_metrics['confusion_matrix'], axis=0).tolist(),  # average confusion matrix across folds
            'TP_TN_FP_FN': fold_metrics['tp_tn_fp_fn'],  # TP, TN, FP, FN for each fold
            'Avg TP': np.mean(fold_metrics['tp']),
            'Avg TN': np.mean(fold_metrics['tn']),
            'Avg FP': np.mean(fold_metrics['fp']),
            'Avg FN': np.mean(fold_metrics['fn']),
            'Std Accuracy': np.std(fold_metrics['accuracy']),
            'Std Precision': np.std(fold_metrics['precision']),
            'Std Recall': np.std(fold_metrics['recall']),
            'Std F1 Score': np.std(fold_metrics['f1_score'])
        }

        # Print the results for the current model
        print(f"\n{name} Model Results:")
        print(f"CV: {results[name]['CV']}")
        print(f"Accuracy: {results[name]['Accuracy']}")
        print(f"Precision: {results[name]['Precision']}")
        print(f"Recall: {results[name]['Recall']}")
        print(f"F1 Score: {results[name]['F1 Score']}")
        print(f"STD Accuracy: {results[name]['Std Accuracy']}")
        print(f"STD Precision: {results[name]['Std Precision']}")
        print(f"STD Recall: {results[name]['Std Recall']}")
        print(f"STD F1 Score: {results[name]['Std F1 Score']}")
        print(f"Confusion Matrix: \n{np.array(results[name]['Confusion Matrix'])}\n")
        print(f"TP_TN_FP_FN: \n{results[name]['TP_TN_FP_FN']}\n")
        print(f"Avg TP: {results[name]['Avg TP']}")
        print(f"Avg TN: {results[name]['Avg TN']}")
        print(f"Avg FP: {results[name]['Avg FP']}")
        print(f"Avg FN: {results[name]['Avg FN']}\n")

    return results

In [42]:
# With bksb, slope and roll set to true
data_file= "/kaggle/input/chemcancer-v2/Data/HC05_HC07.csv"

In [43]:
print("CV preprocessing")
X_filtered, y = preprocess_cv_raw_data(data_file)

CV preprocessing
Load the data
Data shape : (2373, 274)
Extract the feature and target data
X shaped: (2373, 270)
y shaped: (2373,)
Apply filters and background substraction to the features dataset


In [62]:
# Load the data
print("Load the data")
data = load_data(data_file)
print(f"Data shape : {data.shape}")

# Extract the feature and target data
print("Extract the feature and target data")
X, y = extract_data(data)
print(f"X shaped: {X.shape}")
print(f"y shaped: {y.shape}")

Load the data
Data shape : (2373, 274)
Extract the feature and target data
X shaped: (2373, 270)
y shaped: (2373,)


## (PCA) Not Filtered - Original was used

In [65]:
if DO_ML:
    print("Building machine learning models...")
    ml_models = build_ml_models()
    results = train_and_evaluate_ml_models(
        models=ml_models,
        X=X, 
        y=y, 
        apply_pca=True, 
        pca_variance_threshold=0.95, 
        apply_filters_bg_subtraction=False,
        cv=5, 
        standardize_data_func=standardize_data
    )


Building machine learning models...
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...

SVM Model Results:
CV: 5
Accuracy: 0.4825200977126361
Precision: 0.503630983962215
Recall: 0.46920036047779135
F1 Score: 0.4086809808287743
STD Accuracy: 0.013810824792867816
STD Precision: 0.0426413597614061
STD Recall: 0.013769518932179706
STD F1 Score: 0.016339923686039544
Confusion Matrix: 
[[129.   51.6   2.4]
 [ 45.2  91.    5.4]
 [ 86.   55.    9. ]]

TP_TN_FP_FN: 
[{0: (0.2631578947368421, 0.9985728531855955, 0.2926315789473684, 0.12210526315789473), 1: (0.18105263157894738, 0.9988653739612188, 0.24, 0.11789473684210526), 2: (0.01263157894736842, 0.999313019390581

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Training Logistic Regression model...
Evaluating Logistic Regression model...
Standardize the data
Apply PCA
Training Logistic Regression model...
Evaluating Logistic Regression model...
Standardize the data
Apply PCA


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Training Logistic Regression model...
Evaluating Logistic Regression model...

Logistic Regression Model Results:
CV: 5
Accuracy: 0.35145991561181433
Precision: 0.22081076582233394
Recall: 0.3266210447196608
F1 Score: 0.26090031350521314
STD Accuracy: 0.012896213960936783
STD Precision: 0.009035359343707078
STD Recall: 0.012099381889289013
STD F1 Score: 0.009460202044502446
Confusion Matrix: 
[[124.   59.    0. ]
 [ 98.8  42.8   0. ]
 [ 90.4  59.6   0. ]]

TP_TN_FP_FN: 
[{0: (0.2694736842105263, 0.9983202216066481, 0.4126315789473684, 0.11578947368421053), 1: (0.09263157894736843, 0.9988963988919667, 0.22526315789473683, 0.2063157894736842), 2: (0.0, 0.9993351800554017, 0.0, 0.3157894736842105)}, {0: (0.25473684210526315, 0.99837783933518, 0.38526315789473686, 0.13052631578947368), 1: (0.09473684210526316, 0.998812188365651, 0.26526315789473687, 0.20421052631578948), 2: (0.0, 0.9993351800554017, 0.0, 0.3157894736842105)}, {0: (0.2463157894736842, 0.998382271468144, 0.3831578947368421, 

  _warn_prf(average, modifier, msg_start, len(result))


Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...
Standardize the data
Apply PCA
Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...
Standardize the data
Apply PCA
Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...
Standardize the data
Apply PCA
Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...

k-Nearest Neighbors Model Results:
CV: 5
Accuracy: 0.5385671774372641
Precision: 0.5369212017041564
Recall: 0.5280286308012007
F1 Score: 0.5263850203461083
STD Accuracy: 0.019437798761455037
STD Precision: 0.022678899176864446
STD Recall: 0.01753567259996242
STD F1 Score: 0.017884824136602865
Confusion Matrix: 
[[123.6  29.6  29.8]
 [ 46.4  72.6  22.6]
 [ 56.   34.6  59.4]]

TP_TN_FP_FN: 
[{0: (0.25263157894736843, 0.9987501385041552, 0.20842105263157895, 0.13263157894736843), 1: (0.15368421052631578, 0.9990559556786703, 0.14947368421052631, 0.14526315789473684), 2: (0.12, 0.999091412742

## (No PCA) Not filtered - Original data was used

In [66]:
 # Standardize the training and test sets
print("Standardizing the data...")
X_train_std, X_test_std = standardize_data(X_train, X_test)

if DO_ML:
    print("Building machine learning models...")
    ml_models = build_ml_models()
    results = train_and_evaluate_ml_models(
    models=ml_models,
    X=X, 
    y=y, 
    apply_pca=False, 
    pca_variance_threshold=0.95, 
    apply_filters_bg_subtraction=False,
    cv=5, 
    standardize_data_func=standardize_data
)

Standardizing the data...
Building machine learning models...
Standardize the data
Training SVM model...
Evaluating SVM model...
Standardize the data
Training SVM model...
Evaluating SVM model...
Standardize the data
Training SVM model...
Evaluating SVM model...
Standardize the data
Training SVM model...
Evaluating SVM model...
Standardize the data
Training SVM model...
Evaluating SVM model...

SVM Model Results:
CV: 5
Accuracy: 0.5752156340217633
Precision: 0.5883099260223166
Recall: 0.5598970138312337
F1 Score: 0.5385358838003318
STD Accuracy: 0.008240112451791834
STD Precision: 0.012354528661683946
STD Recall: 0.008390911119972354
STD F1 Score: 0.01189004151756246
Confusion Matrix: 
[[146.2  29.2   7.6]
 [ 38.2  89.6  13.8]
 [ 69.4  43.4  37.2]]

TP_TN_FP_FN: 
[{0: (0.3178947368421053, 0.9987235457063712, 0.22105263157894736, 0.06736842105263158), 1: (0.17894736842105263, 0.9990736842105263, 0.14105263157894737, 0.12), 2: (0.08421052631578947, 0.9992155124653739, 0.05684210526315789

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...
Standardize the data
Training Logistic Regression model...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...
Standardize the data
Training Logistic Regression model...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...
Standardize the data
Training Logistic Regression model...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...
Standardize the data
Training Logistic Regression model...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...

Logistic Regression Model Results:
CV: 5
Accuracy: 0.7189171663335554
Precision: 0.7086591927457073
Recall: 0.7058022797873159
F1 Score: 0.7050211938311303
STD Accuracy: 0.018364405777292696
STD Precision: 0.0184667575760235
STD Recall: 0.018583753925613244
STD F1 Score: 0.018367873982402208
Confusion Matrix: 
[[160.2   9.2  13.6]
 [ 21.   89.2  31.4]
 [ 21.2  37.   91.8]]

TP_TN_FP_FN: 
[{0: (0.3368421052631579, 0.9989761772853186, 0.10105263157894737, 0.04842105263157895), 1: (0.16210526315789472, 0.9992155124653739, 0.07368421052631578, 0.1368421052631579), 2: (0.20210526315789473, 0.9990736842105263, 0.12421052631578948, 0.11368421052631579)}, {0: (0.35368421052631577, 0.9990293628808865, 0.07578947368421053, 0.031578947368421054), 1: (0.1957894736842105, 0.9991445983379501, 0.10736842105263159, 0.1031578947368421), 2: (0.19157894736842104, 0.999175623268698, 0.07578947368421053, 0.12421052631578948)}, {0: (0.3410526315789474, 0.998985041551

## (PCA) Filtered - Original Dataset was used

In [67]:
  if DO_ML:
    print("Building machine learning models...")
    ml_models = build_ml_models()
    results = train_and_evaluate_ml_models(
    models=ml_models,
    X=X, 
    y=y, 
    apply_pca=True, 
    pca_variance_threshold=0.95, 
    apply_filters_bg_subtraction=True,
    cv=5, 
    standardize_data_func=standardize_data
)

Building machine learning models...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training SVM model...
Evaluating SVM model...

SVM Model Results:
CV: 5
Accuracy: 0.48375216522318454
Precision: 0.4795665128822059
Recall: 0.4729488644458309
F1 Score: 0.4536848609938365
STD Accuracy: 0.027113690385558373
STD Precision: 0.03973128980079515
STD Recall: 0.0284205141161145
STD F1 Score: 0.027257948457079413
Confusion Matrix: 
[[119.   43.2  20.8]
 [ 47.   79.4  15.2]
 [ 68.4  50.4  31.2]]

TP_TN_FP_FN:

  _warn_prf(average, modifier, msg_start, len(result))


Standardize the data
Apply PCA
Training Logistic Regression model...
Evaluating Logistic Regression model...
Apply filters and background subtraction


  _warn_prf(average, modifier, msg_start, len(result))


Standardize the data
Apply PCA
Training Logistic Regression model...
Evaluating Logistic Regression model...
Apply filters and background subtraction


  _warn_prf(average, modifier, msg_start, len(result))


Standardize the data
Apply PCA
Training Logistic Regression model...
Evaluating Logistic Regression model...
Apply filters and background subtraction


  _warn_prf(average, modifier, msg_start, len(result))


Standardize the data
Apply PCA
Training Logistic Regression model...
Evaluating Logistic Regression model...

Logistic Regression Model Results:
CV: 5
Accuracy: 0.37127159671330223
Precision: 0.22199590233605604
Recall: 0.32573376935644016
F1 Score: 0.21253720732378625
STD Accuracy: 0.019946776549481522
STD Precision: 0.03360081190063136
STD Recall: 0.01652562612485247
STD F1 Score: 0.00820849761666949
Confusion Matrix: 
[[167.2  15.8   0. ]
 [132.6   9.    0. ]
 [140.2   9.8   0. ]]

TP_TN_FP_FN: 
[{0: (0.31157894736842107, 0.9980188365650969, 0.5557894736842105, 0.07368421052631578), 1: (0.02736842105263158, 0.9991490304709141, 0.10526315789473684, 0.27157894736842103), 2: (0.0, 0.9993351800554017, 0.0, 0.3157894736842105)}, {0: (0.3452631578947368, 0.9979612188365651, 0.5831578947368421, 0.04), 1: (0.014736842105263158, 0.9992509695290859, 0.056842105263157895, 0.28421052631578947), 2: (0.0, 0.9993351800554017, 0.0, 0.3157894736842105)}, {0: (0.3684210526315789, 0.997987811634349, 0

  _warn_prf(average, modifier, msg_start, len(result))


Standardize the data
Apply PCA
Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...
Apply filters and background subtraction
Standardize the data
Apply PCA
Training k-Nearest Neighbors model...
Evaluating k-Nearest Neighbors model...

k-Nearest Neighbors Model Results:
CV: 5
Accuracy: 0.46437308461025983
Precision: 0.4564747177816115
Recall: 0.456181644999244
F1 Score: 0.4526609434942829
STD Accuracy: 0.023212374796079457
STD Precision: 0.02398981797319725
STD Recall: 0.023066159081528932
STD F1 Score: 0.023194282513280

 ## (No PCA) Filtered - Original Dataset was used

In [68]:
  if DO_ML:
    print("Building machine learning models...")
    ml_models = build_ml_models()
    results = train_and_evaluate_ml_models(
    models=ml_models,
    X=X, 
    y=y, 
    apply_pca=False, 
    pca_variance_threshold=0.95, 
    apply_filters_bg_subtraction=True,
    cv=5, 
    standardize_data_func=standardize_data
)

Building machine learning models...
Apply filters and background subtraction
Standardize the data
Training SVM model...
Evaluating SVM model...
Apply filters and background subtraction
Standardize the data
Training SVM model...
Evaluating SVM model...
Apply filters and background subtraction
Standardize the data
Training SVM model...
Evaluating SVM model...
Apply filters and background subtraction
Standardize the data
Training SVM model...
Evaluating SVM model...
Apply filters and background subtraction
Standardize the data
Training SVM model...
Evaluating SVM model...

SVM Model Results:
CV: 5
Accuracy: 0.514932267377304
Precision: 0.512207467242976
Recall: 0.5048623148416522
F1 Score: 0.48649922677398116
STD Accuracy: 0.033150384905829326
STD Precision: 0.039537470018323674
STD Recall: 0.030623314789248225
STD F1 Score: 0.027927549362637995
Confusion Matrix: 
[[123.2  39.6  20.2]
 [ 40.   84.6  17. ]
 [ 59.8  53.6  36.6]]

TP_TN_FP_FN: 
[{0: (0.31157894736842107, 0.9987058171745152, 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...
Apply filters and background subtraction
Standardize the data
Training Logistic Regression model...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...
Apply filters and background subtraction
Standardize the data
Training Logistic Regression model...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...
Apply filters and background subtraction
Standardize the data
Training Logistic Regression model...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...
Apply filters and background subtraction
Standardize the data
Training Logistic Regression model...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Evaluating Logistic Regression model...

Logistic Regression Model Results:
CV: 5
Accuracy: 0.38264712413946256
Precision: 0.3865077947084226
Recall: 0.3938797792373744
F1 Score: 0.3762282225052475
STD Accuracy: 0.03642916361844391
STD Precision: 0.0418173039979979
STD Recall: 0.03570445659820851
STD F1 Score: 0.03729836054877026
Confusion Matrix: 
[[43.8 64.2 75. ]
 [38.  59.4 44.2]
 [41.4 30.2 78.4]]

TP_TN_FP_FN: 
[{0: (0.14736842105263157, 0.9986437673130193, 0.25894736842105265, 0.23789473684210527), 1: (0.09052631578947369, 0.9991578947368421, 0.10105263157894737, 0.20842105263157895), 2: (0.16210526315789472, 0.9988299168975069, 0.24, 0.15368421052631578)}, {0: (0.12210526315789473, 0.9987457063711912, 0.21052631578947367, 0.2631578947368421), 1: (0.1368421052631579, 0.9989141274238227, 0.2168421052631579, 0.16210526315789472), 2: (0.15578947368421053, 0.9990027700831026, 0.15789473684210525, 0.16)}, {0: (0.061052631578947365, 0.9988254847645429, 0.1726315789473684, 0.3242105263