In [37]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
from glob import glob
import cv2
import pickle
from sklearn.model_selection import train_test_split

## Loading Data

In [38]:
with open('Image_Data_256x256.pkl', 'rb') as file:
    data = pickle.load(file)

In [39]:
bbox_folder = 'archive/Bounding Boxes - YOLO Format - 1/Bounding Boxes - YOLO Format - 1'
bb_paths = glob(os.path.join(bbox_folder,'*.txt'))
len(bb_paths)

4000

In [40]:
path = "archive/Images - 1/Images - 1/"
image_paths = glob(os.path.join(path,'*.jpg'))

bbox_folder = 'archive/Bounding Boxes - YOLO Format - 1/Bounding Boxes - YOLO Format - 1'
bb_paths = glob(os.path.join(bbox_folder,'*.txt'))


Image_data = []
Label_data = []
Image_path = []

for i in bb_paths:
    if os.path.exists(i):
        if os.path.getsize(i) == 0:
            Image_path.append(image_paths[bb_paths.index(i)])
            Image_data.append(data[bb_paths.index(i)])
            Label_data.append(0)
        else:
            Image_path.append(image_paths[bb_paths.index(i)])
            Image_data.append(data[bb_paths.index(i)])
            Label_data.append(1)

In [41]:
df = pd.DataFrame({'Image': Image_data, 'Label': Label_data})

In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Image   4000 non-null   object
 1   Label   4000 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 62.6+ KB


In [43]:
Train_df, Test_df = df.iloc[0:3200,:],df.iloc[3200:,:]

In [44]:
np.unique(Train_df.Label, return_counts=True)

(array([0, 1]), array([ 311, 2889]))

In [45]:
np.unique(Test_df.Label, return_counts=True)

(array([0, 1]), array([ 77, 723]))

## Duplicating the less sample class with Filtered images in the Training data

In [46]:
class_0 = Train_df[Train_df.Label==0]
class_1 = Train_df[Train_df.Label==1]

In [47]:
RS_Image = list(class_1.Image)
RS_Label = list(class_1.Label)

for i in range(len(class_0)):
    k = np.random.randint(0,len(class_0), 1)[0]
    RS_Image.append(cv2.GaussianBlur(class_0.Image.iloc[k], (5, 5), 0))
    RS_Label.append(class_0.Label.iloc[k])
    RS_Image.append(cv2.bilateralFilter(class_0.Image.iloc[k], 9, 75, 75))
    RS_Label.append(class_0.Label.iloc[k])
    RS_Image.append(cv2.magnitude(cv2.Sobel(class_0.Image.iloc[k], cv2.CV_64F, 1, 0, ksize=3), cv2.Sobel(class_0.Image.iloc[k], cv2.CV_64F, 0, 1, ksize=3)))
    RS_Label.append(class_0.Label.iloc[k])

RS_Train_df = pd.DataFrame({'Image': RS_Image, 'Label': RS_Label})
RS_Train_df = RS_Train_df.sample(frac=1)

In [48]:
np.unique(RS_Train_df.Label, return_counts=True)

(array([0, 1]), array([ 933, 2889]))

In [49]:
from tensorflow.keras.utils import to_categorical

x_train = np.stack(RS_Train_df['Image'].values)
y_train = to_categorical(np.stack(RS_Train_df['Label'].values), num_classes = 2)

x_test = np.stack(Test_df['Image'].values)
y_test = to_categorical(np.stack(Test_df['Label'].values), num_classes = 2)

In [50]:
x_train = x_train.reshape(x_train.shape[0], *(256, 256, 3))
x_test = x_test.reshape(x_test.shape[0], *(256, 256, 3))

## VGG19 Feature Representation

In [51]:
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers.legacy import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.layers import Flatten, Dense, BatchNormalization

input_shape = (256, 256, 3)

vgg19_model = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)

for layer in vgg19_model.layers:
    layer.trainable = False

model = Sequential()
model.add(vgg19_model)
model.add(Flatten())

model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg19 (Functional)          (None, 8, 8, 512)         20024384  
                                                                 
 flatten (Flatten)           (None, 32768)             0         
                                                                 
Total params: 20024384 (76.39 MB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 20024384 (76.39 MB)
_________________________________________________________________


In [None]:
features = model.predict(x_train)
x_test_features = model.predict(x_test)

In [None]:
features = features.reshape((features.shape[0], -1))
x_test_features = x_test_features.reshape((x_test_features.shape[0], -1))
plt.plot(features[311])

## KNN with VGG19 Features

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

knn_model = KNeighborsClassifier(n_neighbors=1) 
knn_model.fit(features, y_train)
y_pred = knn_model.predict(x_test_features)
print(accuracy_score(y_test, y_pred))
print(f1_score(y_test,y_pred,average="weighted"))

cm = confusion_matrix(np.argmax(y_test,axis=1),np.argmax(y_pred,axis=1))

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.show()

## Lasso Feature Selection

In [None]:
from sklearn.linear_model import Lasso

alpha = 0.02 ## Increasing makes more features sparse
lasso_model = Lasso(alpha=alpha)
lasso_model.fit(features, y_train)
klp = lasso_model.predict(x_test_features)

In [None]:
Lasso_features = []
for i in features:
    Lasso_features.append(np.array(i[lasso_model.coef_[0] != 0]))
Lasso_features = np.array(Lasso_features)

Lasso_x_test_features = []
for i in x_test_features:
    Lasso_x_test_features.append(np.array(i[lasso_model.coef_[0] != 0]))
Lasso_x_test_features = np.array(Lasso_x_test_features)

In [None]:
print(f'{len(features[0])},{len(Lasso_features[0])}')

## KNN with Lasso Features

In [None]:
knn_model = KNeighborsClassifier(n_neighbors=1) 
knn_model.fit(Lasso_features, y_train)
y_pred = knn_model.predict(Lasso_x_test_features)
print(accuracy_score(y_test, y_pred))
print(f1_score(y_test,y_pred,average="weighted"))

cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.show()

## Proximal Gradient Descent Optimization

In [None]:
from scipy.optimize import minimize

def objective_function(x, A, b, lambd):
    return 0.5 * np.linalg.norm(A @ x - b)**2 + lambd * np.linalg.norm(x, 1)

def proximal_gradient_descent_scipy(A, b, lambd, x0, max_iter=10, tol=1e-4):
    result = minimize(
        fun=objective_function,
        x0=x0,
        args=(A, b, lambd),
        method='L-BFGS-B',  
        options={'maxiter': max_iter, 'disp': True}
    )

    return result.x

A = np.matrix(Lasso_features)
b = np.matrix(np.argmax(y_train, axis=1))

resulting_coefficients = proximal_gradient_descent_scipy(A, b, lambd=0.1, x0=np.zeros(A.shape[1]))

In [None]:
rc = np.round(resulting_coefficients,3)
sum(rc!=0)

In [None]:
PGD_features = []
for i in Lasso_features:
    PGD_features.append(np.array(i[rc != 0]))
PGD_features = np.array(PGD_features)

PGD_x_test_features = []
for i in Lasso_x_test_features:
    PGD_x_test_features.append(np.array(i[rc != 0]))
PGD_x_test_features = np.array(PGD_x_test_features)

## KNN with PGD Features

In [None]:
knn_model = KNeighborsClassifier(n_neighbors=1) 
knn_model.fit(PGD_features, y_train)
y_pred = knn_model.predict(PGD_x_test_features)
print(accuracy_score(y_test, y_pred))
print(f1_score(y_test,y_pred,average="weighted"))

cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.show()

## Anamoly Detection Methods

In [None]:
from sklearn import svm

svc = svm.OneClassSVM(nu=0.5, kernel='rbf', gamma=0.1)

svc.fit(PGD_features)
test_predictions = svc.predict(PGD_x_test_features)

print(accuracy_score(y_test, y_pred))
print(f1_score(y_test,y_pred,average="weighted"))

cm = confusion_matrix(np.argmax(y_test, axis=1), np.argmax(y_pred, axis=1))

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.show()

In [None]:
from sklearn.ensemble import IsolationForest

isolation_forest = IsolationForest(contamination=0.3)  
isolation_forest.fit(PGD_features)


outlier_predictions = isolation_forest.predict(PGD_x_test_features)

anomaly_labels = (outlier_predictions == 1).astype(int)

y_pred = anomaly_labels
y_tests = np.argmax(y_test, axis=1)
print(accuracy_score(y_tests, y_pred))
print(f1_score(y_tests,y_pred,average="weighted"))

cm = confusion_matrix(y_tests, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.show()
print(classification_report(y_tests, y_pred))

In [None]:
from sklearn.neighbors import LocalOutlierFactor
import numpy as np

features = features.reshape((len(PGD_features), -1))

lof = LocalOutlierFactor(n_neighbors=5, contamination=0.1) 
outlier_scores = lof.fit_predict(PGD_x_test_features)

anomaly_labels = (outlier_scores == 1).astype(int)

y_pred = anomaly_labels
print(accuracy_score(y_tests, y_pred))
print(f1_score(y_tests,y_pred,average="weighted"))

cm = confusion_matrix(y_tests, y_pred)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.show()

In [None]:
yt = np.argmax(y_test, axis=1)
yp = np.argmax(y_pred, axis=1)

k = np.random.randint(0, len(yt), size=10)

fig, axes = plt.subplots(2, 5, figsize=(15, 6))

for i, ax in zip(k, axes.flatten()):
    ax.imshow(x_test[i] / 255)
    ax.set_title(f'Actual: {yt[i]}\nPredicted: {yp[i]}')
    ax.axis('off')

plt.tight_layout()
plt.show()