# MobileNetV2

In [1]:
import tensorflow as tf
from sklearn.svm import OneClassSVM
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
import numpy as np
from pathlib import Path

# Import the necessary Keras modules containing model and layers
# Used this method to avoid import issueds from pylint
MobileNetV2 = tf.keras.applications.mobilenet_v2.MobileNetV2
image = tf.keras.preprocessing.image
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
ImageDataGenerator = tf.keras.preprocessing.image.ImageDataGenerator

# Load MobileNetV2 pre-trained on ImageNet without the top layer
model = MobileNetV2(weights="imagenet", include_top=False)

# Create an instance of ImageDataGenerator with augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
    preprocessing_function=preprocess_input,  # This will apply the same preprocessing as before
)


# Function to extract features using the data generator
def extract_features_with_augmentation(directory, sample_count):
    features = np.zeros(
        shape=(sample_count, 7, 7, 1280)
    )  # This shape is specific to MobileNetV2 with include_top=False
    generator = datagen.flow_from_directory(
        directory,
        target_size=(224, 224),
        batch_size=32,
        class_mode=None,  # Since we're using OneClassSVM, we don't need the labels
        shuffle=False,  # Important for feature extraction
    )
    i = 0
    for inputs_batch in generator:
        features_batch = model.predict(inputs_batch)
        batch_size = inputs_batch.shape[
            0
        ]  # Get the actual batch size of the inputs_batch
        if (
            i * 32 + batch_size > sample_count
        ):  # Check if the next batch exceeds the sample count
            features[i * 32 : sample_count] = features_batch[: (sample_count - i * 32)]
            break
        else:
            features[i * 32 : i * 32 + batch_size] = features_batch
        i += 1
        if i * 32 >= sample_count:
            break
    return features


# Define paths using pathlib
train_dir = Path("../../data/one_class_dataset/train")


# Determine the number of images to generate through augmentation
# This should be a multiple of the batch size for simplicity
# num_images_to_generate = 3200  # For example, to generate 10 batches of size 32
# num_images_to_generate = 3020 # took 16 minutes to run
num_images_to_generate = 400

# Extract features from augmented training images
train_features_augmented = extract_features_with_augmentation(
    train_dir, num_images_to_generate
)

# Flatten the features as you did before
train_features_augmented = train_features_augmented.reshape(
    (train_features_augmented.shape[0], -1)
)


def anomaly_score(estimator, X):
    return -estimator.decision_function(X).ravel()


anomaly_scorer = make_scorer(anomaly_score)

# Define the one-class SVM with a grid search for hyperparameter tuning
params = {"nu": [0.01, 0.05, 0.1, 0.5], "gamma": ["scale", "auto"], "kernel": ["rbf"]}
svm = OneClassSVM()
clf = GridSearchCV(
    svm, params, scoring=anomaly_scorer, cv=5, n_jobs=-1
)  # Use all processors
clf.fit(train_features_augmented)

# The best estimator after the grid search
best_svm = clf.best_estimator_

2023-11-08 21:17:41.893467: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-08 21:17:41.895513: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-08 21:17:41.940849: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-08 21:17:41.940886: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-08 21:17:41.940914: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

Found 302 images belonging to 1 classes.


Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 808, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 808, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_sel

In [2]:
# Function to predict new images using the trained SVM
def predict_images(img_dir):
    new_features = extract_features_with_augmentation(
        img_dir, len(list((img_dir / "class_1").glob("*.png")))
    )
    new_features = new_features.reshape((new_features.shape[0], -1))
    scores = best_svm.decision_function(new_features)
    predictions = best_svm.predict(new_features)
    return scores, predictions


# Determine the threshold for anomaly detection
threshold = np.quantile(best_svm.decision_function(train_features_augmented), 0.03)


# Function to classify images as inliers or outliers
def classify_images(scores, threshold):
    return ["inlier" if score > threshold else "outlier" for score in scores]


validate_dir = Path("../../data/one_class_dataset/validate")

# Predict and classify new images
validate_scores, validate_predictions = predict_images(validate_dir)
validate_classifications = classify_images(validate_scores, threshold)

validate_dir = validate_dir / "class_1"

# Combine image paths, classifications, and scores into a single list
image_info = list(
    zip(validate_dir.glob("*.png"), validate_classifications, validate_scores)
)

# Split the combined list into inliers and outliers, including the score
inlier_paths = [
    (img_path, classification, score)
    for img_path, classification, score in image_info
    if classification == "inlier"
]
# outlier_paths = [
#     (img_path, classification, score)
#     for img_path, classification, score in image_info
#     if classification == "outlier"
# ]

Found 301 images belonging to 1 classes.


In [3]:
# Print the inlier paths with scores
# for img_path, classification, score in inlier_paths:
#     print(f"{img_path}: {classification}, Score: {score}")

# for img_path, classification, score in outlier_paths:
#     print(f"{img_path}: {classification}, Score: {score}")

In [4]:
# Print the number of inliers and outliers
# print(threshold)
print(f"Number of inliers: {len(inlier_paths)}")
# print(f"Number of outliers: {len(outlier_paths)}")

Number of inliers: 221


In [5]:
len(inlier_paths) / len(list(validate_dir.glob("*.png")))

0.7342192691029901

# MobileNetV3


In [7]:
import tensorflow as tf
from sklearn.svm import OneClassSVM
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
import numpy as np
from pathlib import Path

# Import the necessary Keras modules containing model and layers
# Used this method to avoid import issueds from pylint
MobileNetV3 = tf.keras.applications.MobileNetV3Large
image = tf.keras.preprocessing.image
preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input
ImageDataGenerator = tf.keras.preprocessing.image.ImageDataGenerator

# Load MobileNetV3 pre-trained on ImageNet without the top layer
model = MobileNetV3(weights="imagenet", include_top=False)

# Create an instance of ImageDataGenerator with augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
    preprocessing_function=preprocess_input,  # This will apply the same preprocessing as before
)


# Function to extract features using the data generator
def extract_features_with_augmentation(directory, sample_count):
    features = np.zeros(
        shape=(sample_count, 7, 7, 960)
    )  # This shape is specific to MobileNetV3 with include_top=False
    generator = datagen.flow_from_directory(
        directory,
        target_size=(224, 224),
        batch_size=32,
        class_mode=None,  # Since we're using OneClassSVM, we don't need the labels
        shuffle=False,  # Important for feature extraction
    )
    i = 0
    for inputs_batch in generator:
        features_batch = model.predict(inputs_batch)
        batch_size = inputs_batch.shape[
            0
        ]  # Get the actual batch size of the inputs_batch
        if (
            i * 32 + batch_size > sample_count
        ):  # Check if the next batch exceeds the sample count
            features[i * 32 : sample_count] = features_batch[: (sample_count - i * 32)]
            break
        else:
            features[i * 32 : i * 32 + batch_size] = features_batch
        i += 1
        if i * 32 >= sample_count:
            break
    return features


# Define paths using pathlib
train_dir = Path("../../data/one_class_dataset/train")


# Determine the number of images to generate through augmentation
# This should be a multiple of the batch size for simplicity
# num_images_to_generate = 3200  # For example, to generate 10 batches of size 32
# num_images_to_generate = 3020 # took 16 minutes to run
num_images_to_generate = 400

# Extract features from augmented training images
train_features_augmented = extract_features_with_augmentation(
    train_dir, num_images_to_generate
)

# Flatten the features as you did before
train_features_augmented = train_features_augmented.reshape(
    (train_features_augmented.shape[0], -1)
)


def anomaly_score(estimator, X):
    return -estimator.decision_function(X).ravel()


anomaly_scorer = make_scorer(anomaly_score)

# Define the one-class SVM with a grid search for hyperparameter tuning
params = {"nu": [0.01, 0.05, 0.1, 0.5], "gamma": ["scale", "auto"], "kernel": ["rbf"]}
svm = OneClassSVM()
clf = GridSearchCV(
    svm, params, scoring=anomaly_scorer, cv=5, n_jobs=-1
)  # Use all processors
clf.fit(train_features_augmented)

# The best estimator after the grid search
best_svm = clf.best_estimator_

Found 302 images belonging to 1 classes.


Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 808, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 808, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_sel

In [8]:
# Function to predict new images using the trained SVM
def predict_images(img_dir):
    new_features = extract_features_with_augmentation(
        img_dir, len(list((img_dir / "class_1").glob("*.png")))
    )
    new_features = new_features.reshape((new_features.shape[0], -1))
    scores = best_svm.decision_function(new_features)
    predictions = best_svm.predict(new_features)
    return scores, predictions


# Determine the threshold for anomaly detection
threshold = np.quantile(best_svm.decision_function(train_features_augmented), 0.03)


# Function to classify images as inliers or outliers
def classify_images(scores, threshold):
    return ["inlier" if score > threshold else "outlier" for score in scores]


validate_dir = Path("../../data/one_class_dataset/validate")

# Predict and classify new images
validate_scores, validate_predictions = predict_images(validate_dir)
validate_classifications = classify_images(validate_scores, threshold)

validate_dir = validate_dir / "class_1"

# Combine image paths, classifications, and scores into a single list
image_info = list(
    zip(validate_dir.glob("*.png"), validate_classifications, validate_scores)
)

# Split the combined list into inliers and outliers, including the score
inlier_paths = [
    (img_path, classification, score)
    for img_path, classification, score in image_info
    if classification == "inlier"
]

Found 301 images belonging to 1 classes.


In [9]:
# Print the number of inliers and outliers
# print(threshold)
print(f"Number of inliers: {len(inlier_paths)}")
# print(f"Number of outliers: {len(outlier_paths)}")

Number of inliers: 233


In [10]:
len(inlier_paths) / len(list(validate_dir.glob("*.png")))

0.7740863787375415