# MobilenetV2


In [1]:
import tensorflow as tf
from sklearn.svm import OneClassSVM
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
import numpy as np
from pathlib import Path

# Import the necessary Keras modules containing model and layers
# Used this method to avoid import issueds from pylint
MobileNetV2 = tf.keras.applications.mobilenet_v2.MobileNetV2
image = tf.keras.preprocessing.image
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

# Load MobileNetV2 pre-trained on ImageNet without the top layer
model = MobileNetV2(weights="imagenet", include_top=False)


def extract_features(img_dir):
    img_list = []
    for img_path in img_dir.glob("*.png"):
        try:
            # Load image and preprocess it for MobileNetV2
            img = image.load_img(img_path, target_size=(224, 224))
            img_array = image.img_to_array(img)
            img_list.append(img_array)
        except Exception as e:
            print(f"Error processing image {img_path}: {e}")

    if not img_list:
        return np.array([])

    # Ensure that img_batch has a batch dimension
    img_batch = np.stack(img_list, axis=0)
    img_batch = preprocess_input(img_batch)

    # Check if img_batch has the batch dimension
    if img_batch.ndim == 4:
        # Extract features using MobileNetV2 in batches
        features = model.predict(img_batch)
        features = features.reshape(
            (features.shape[0], -1)
        )  # Flatten the features properly
        return features
    else:
        raise ValueError("img_batch does not have a batch dimension")


# Define paths using pathlib
train_dir = Path("../../data/one_class_dataset/train/class_1")

# Extract features from training images
train_features = extract_features(train_dir)


def anomaly_score(estimator, X):
    return -estimator.decision_function(X).ravel()


anomaly_scorer = make_scorer(anomaly_score)

# Define the one-class SVM with a grid search for hyperparameter tuning
params = {"nu": [0.01, 0.05, 0.1, 0.5], "gamma": ["scale", "auto"], "kernel": ["rbf"]}
svm = OneClassSVM()
clf = GridSearchCV(
    svm, params, scoring=anomaly_scorer, cv=5, n_jobs=-1
)  # Use all processors
clf.fit(train_features)

# The best estimator after the grid search
best_svm = clf.best_estimator_

2023-11-08 21:11:18.610440: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-08 21:11:18.612117: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-08 21:11:18.644350: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-08 21:11:18.644399: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-08 21:11:18.644427: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi



Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 808, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 808, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_sel

In [2]:
# Function to predict new images using the trained SVM
def predict_images(img_dir):
    new_features = extract_features(img_dir)
    scores = best_svm.decision_function(new_features)
    predictions = best_svm.predict(new_features)
    return scores, predictions


# Determine the threshold for anomaly detection
threshold = np.quantile(best_svm.decision_function(train_features), 0.03)


# Function to classify images as inliers or outliers
def classify_images(scores, threshold):
    return ["inlier" if score > threshold else "outlier" for score in scores]


validate_dir = Path("../../data/one_class_dataset/validate/class_1")

# Predict and classify new images
validate_scores, validate_predictions = predict_images(validate_dir)
validate_classifications = classify_images(validate_scores, threshold)

# Combine image paths, classifications, and scores into a single list
image_info = list(
    zip(validate_dir.glob("*.png"), validate_classifications, validate_scores)
)

# Split the combined list into inliers and outliers, including the score
inlier_paths = [
    (img_path, classification, score)
    for img_path, classification, score in image_info
    if classification == "inlier"
]
outlier_paths = [
    (img_path, classification, score)
    for img_path, classification, score in image_info
    if classification == "outlier"
]

# Print the inlier paths with scores
for img_path, classification, score in inlier_paths:
    print(f"{img_path}: {classification}, Score: {score}")

for img_path, classification, score in outlier_paths:
    print(f"{img_path}: {classification}, Score: {score}")

# Print the number of inliers and outliers
print(threshold)
print(f"Number of inliers: {len(inlier_paths)}")
print(f"Number of outliers: {len(outlier_paths)}")

../../data/one_class_dataset/validate/class_1/08_043.png: inlier, Score: 0.00474130429317865
../../data/one_class_dataset/validate/class_1/01_046.png: inlier, Score: 0.046129124891546414
../../data/one_class_dataset/validate/class_1/03_016.png: inlier, Score: 0.07125645098395295
../../data/one_class_dataset/validate/class_1/02_016.png: inlier, Score: 0.01888324449328388
../../data/one_class_dataset/validate/class_1/10_003.png: inlier, Score: 0.04381541503530084
../../data/one_class_dataset/validate/class_1/03_027.png: inlier, Score: 0.05076173353871155
../../data/one_class_dataset/validate/class_1/00_039.png: inlier, Score: 0.0376830896948791
../../data/one_class_dataset/validate/class_1/00_027.png: inlier, Score: 0.019481872711360926
../../data/one_class_dataset/validate/class_1/04_005.png: inlier, Score: 0.09823796089348685
../../data/one_class_dataset/validate/class_1/18_018.png: inlier, Score: 0.0021056016444733205
../../data/one_class_dataset/validate/class_1/10_002.png: inlier, S

In [3]:
len(inlier_paths) / len(list(validate_dir.glob("*.png")))

0.7142857142857143

# MobileNetV3


In [4]:
import tensorflow as tf
from sklearn.svm import OneClassSVM
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
import numpy as np
from pathlib import Path

# Import the necessary Keras modules containing model and layers
# Used this method to avoid import issueds from pylint
MobileNetV3 = tf.keras.applications.MobileNetV3Large
image = tf.keras.preprocessing.image
preprocess_input = tf.keras.applications.mobilenet_v3.preprocess_input

# Load MobileNetV3 pre-trained on ImageNet without the top layer
model = MobileNetV3(weights="imagenet", include_top=False)


def extract_features(img_dir):
    img_list = []
    for img_path in img_dir.glob("*.png"):
        try:
            # Load image and preprocess it for MobileNetV3
            img = image.load_img(img_path, target_size=(224, 224))
            img_array = image.img_to_array(img)
            img_list.append(img_array)
        except Exception as e:
            print(f"Error processing image {img_path}: {e}")

    if not img_list:
        return np.array([])

    # Ensure that img_batch has a batch dimension
    img_batch = np.stack(img_list, axis=0)
    img_batch = preprocess_input(img_batch)

    # Check if img_batch has the batch dimension
    if img_batch.ndim == 4:
        # Extract features using MobileNetV3 in batches
        features = model.predict(img_batch)
        features = features.reshape(
            (features.shape[0], -1)
        )  # Flatten the features properly
        return features
    else:
        raise ValueError("img_batch does not have a batch dimension")


# Define paths using pathlib
train_dir = Path("../../data/one_class_dataset/train/class_1")

# Extract features from training images
train_features = extract_features(train_dir)


def anomaly_score(estimator, X):
    return -estimator.decision_function(X).ravel()


anomaly_scorer = make_scorer(anomaly_score)

# Define the one-class SVM with a grid search for hyperparameter tuning
params = {"nu": [0.01, 0.05, 0.1, 0.5], "gamma": ["scale", "auto"], "kernel": ["rbf"]}
svm = OneClassSVM()
clf = GridSearchCV(
    svm, params, scoring=anomaly_scorer, cv=5, n_jobs=-1
)  # Use all processors
clf.fit(train_features)

# The best estimator after the grid search
best_svm = clf.best_estimator_



Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 808, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 808, in _score
    scores = scorer(estimator, X_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: _BaseScorer.__call__() missing 1 required positional argument: 'y_true'

Traceback (most recent call last):
  File "/home/spark343/github/capturing_opportunities_capstone/worktree/poc_jupyter_notebook/jupyter_lab/poc/.venv/lib/python3.11/site-packages/sklearn/model_sel

In [5]:
# Function to predict new images using the trained SVM
def predict_images(img_dir):
    new_features = extract_features(img_dir)
    scores = best_svm.decision_function(new_features)
    predictions = best_svm.predict(new_features)
    return scores, predictions


# Determine the threshold for anomaly detection
threshold = np.quantile(best_svm.decision_function(train_features), 0.03)


# Function to classify images as inliers or outliers
def classify_images(scores, threshold):
    return ["inlier" if score > threshold else "outlier" for score in scores]


validate_dir = Path("../../data/one_class_dataset/validate/class_1")

# Predict and classify new images
validate_scores, validate_predictions = predict_images(validate_dir)
validate_classifications = classify_images(validate_scores, threshold)

# Combine image paths, classifications, and scores into a single list
image_info = list(
    zip(validate_dir.glob("*.png"), validate_classifications, validate_scores)
)

# Split the combined list into inliers and outliers, including the score
inlier_paths = [
    (img_path, classification, score)
    for img_path, classification, score in image_info
    if classification == "inlier"
]
outlier_paths = [
    (img_path, classification, score)
    for img_path, classification, score in image_info
    if classification == "outlier"
]

# Print the inlier paths with scores
for img_path, classification, score in inlier_paths:
    print(f"{img_path}: {classification}, Score: {score}")

for img_path, classification, score in outlier_paths:
    print(f"{img_path}: {classification}, Score: {score}")

# Print the number of inliers and outliers
print(threshold)
print(f"Number of inliers: {len(inlier_paths)}")
print(f"Number of outliers: {len(outlier_paths)}")

../../data/one_class_dataset/validate/class_1/08_043.png: inlier, Score: 0.0671988691170729
../../data/one_class_dataset/validate/class_1/01_046.png: inlier, Score: 0.07730099940132557
../../data/one_class_dataset/validate/class_1/03_016.png: inlier, Score: 0.008626805862225473
../../data/one_class_dataset/validate/class_1/02_016.png: inlier, Score: 0.11887984670558033
../../data/one_class_dataset/validate/class_1/10_003.png: inlier, Score: 0.06275428730633914
../../data/one_class_dataset/validate/class_1/00_039.png: inlier, Score: 0.01766613031562514
../../data/one_class_dataset/validate/class_1/05_034.png: inlier, Score: 0.04663283799855977
../../data/one_class_dataset/validate/class_1/00_027.png: inlier, Score: 0.046718971659369835
../../data/one_class_dataset/validate/class_1/10_020.png: inlier, Score: 0.1345586055054397
../../data/one_class_dataset/validate/class_1/05_016.png: inlier, Score: 0.03545657369278665
../../data/one_class_dataset/validate/class_1/04_005.png: inlier, Scor

In [6]:
len(inlier_paths) / len(list(validate_dir.glob("*.png")))

0.7840531561461794