In [None]:
!pip install opencv-python numpy pandas matplotlib scikit-learn tensorflow

In [3]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

# Define constants
IMG_SIZE = (128, 128)
N_CLUSTERS = 100

# Data collection and preprocessing
def load_images_from_folder(folder, labels_df):
    images = []
    labels = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, IMG_SIZE)
            images.append(img)
            label = labels_df.loc[labels_df['image'] == filename, 'labels'].values[0]
            labels.append(label)
    return np.array(images), np.array(labels)

def preprocess_images(images):
    images = images / 255.0
    return images

data_folder = r'C:\Users\Nader Labib\Documents\vs code\indentationCamp\DogaCato\cat_dog1'
labels_csv = r'C:\Users\Nader Labib\Documents\vs code\indentationCamp\DogaCato\cat_dog1.csv'
labels_df = pd.read_csv(labels_csv)

images, labels = load_images_from_folder(data_folder, labels_df)
images = preprocess_images(images)
label_to_idx = {label: idx for idx, label in enumerate(np.unique(labels))}
labels = np.array([label_to_idx[label] for label in labels])

# Split data
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    shear_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

datagen.fit(X_train)

# SIFT Feature Extraction and Bag-of-Words
def extract_sift_features(images):
    sift = cv2.SIFT_create()
    descriptors_list = []
    for img in images:
        gray = cv2.cvtColor((img * 255).astype(np.uint8), cv2.COLOR_BGR2GRAY)  # Convert to 8-bit before grayscale conversion
        _, descriptors = sift.detectAndCompute(gray, None)
        if descriptors is not None:
            descriptors_list.append(descriptors)
    return descriptors_list

def build_vocabulary(descriptors_list, n_clusters):
    all_descriptors = np.vstack(descriptors_list)
    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(all_descriptors)
    return kmeans

def compute_bow_histograms(descriptors_list, kmeans):
    histograms = []
    for descriptors in descriptors_list:
        if descriptors is not None:
            predictions = kmeans.predict(descriptors)
            hist, _ = np.histogram(predictions, bins=range(kmeans.n_clusters + 1), density=True)
        else:
            hist = np.zeros(kmeans.n_clusters)
        histograms.append(hist)
    return np.array(histograms)

sift_train_descriptors = extract_sift_features(X_train)
kmeans = build_vocabulary(sift_train_descriptors, N_CLUSTERS)
X_train_bow = compute_bow_histograms(sift_train_descriptors, kmeans)

sift_val_descriptors = extract_sift_features(X_val)
X_val_bow = compute_bow_histograms(sift_val_descriptors, kmeans)

sift_test_descriptors = extract_sift_features(X_test)
X_test_bow = compute_bow_histograms(sift_test_descriptors, kmeans)

# Standardize BOW features
scaler = StandardScaler()
X_train_bow = scaler.fit_transform(X_train_bow)
X_val_bow = scaler.transform(X_val_bow)
X_test_bow = scaler.transform(X_test_bow)

# SVM Classifier
def train_svm(X_train, y_train, X_val, y_val):
    parameters = {'C': [0.1, 1, 10], 'gamma': ['scale', 'auto']}
    svc = SVC()
    clf = GridSearchCV(svc, parameters)
    clf.fit(X_train, y_train)
    print(f"Best parameters: {clf.best_params_}")
    best_svc = clf.best_estimator_
    y_val_pred = best_svc.predict(X_val)
    print(f"Validation Accuracy: {accuracy_score(y_val, y_val_pred)}")
    return best_svc

best_svc = train_svm(X_train_bow, y_train, X_val_bow, y_val)

# Evaluate SVM
y_test_pred = best_svc.predict(X_test_bow)
print(f"SVM Test Accuracy: {accuracy_score(y_test, y_test_pred)}")
print(f"Precision: {precision_score(y_test, y_test_pred, average='macro')}")
print(f"Recall: {recall_score(y_test, y_test_pred, average='macro')}")
print(f"F1-score: {f1_score(y_test, y_test_pred, average='macro')}")
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_test_pred)}")

# CNN Model
def create_cnn(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

input_shape = X_train.shape[1:]
num_classes = len(label_to_idx)
cnn_model = create_cnn(input_shape, num_classes)

# Train CNN with augmented data

batch_size = 32
cnn_model.fit(datagen.flow(X_train, y_train, batch_size=batch_size), epochs=20, validation_data=(X_val, y_val))

# Evaluate CNN
cnn_model.evaluate(X_test, y_test)

# Summary of results
y_test_pred_cnn = np.argmax(cnn_model.predict(X_test), axis=-1)
print(f"CNN Test Accuracy: {accuracy_score(y_test, y_test_pred_cnn)}")
print(f"Precision: {precision_score(y_test, y_test_pred_cnn, average='macro')}")
print(f"Recall: {recall_score(y_test, y_test_pred_cnn, average='macro')}")
print(f"F1-score: {f1_score(y_test, y_test_pred_cnn, average='macro')}")
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_test_pred_cnn)}")

# Comparison and Analysis
def compare_models(svm_metrics, cnn_metrics):
    print(f"SVM Metrics: {svm_metrics}")
    print(f"CNN Metrics: {cnn_metrics}")
    if svm_metrics['accuracy'] > cnn_metrics['accuracy']:
        print("SVM outperforms CNN in terms of accuracy.")
    else:
        print("CNN outperforms SVM in terms of accuracy.")
    # Additional analysis can be added here

svm_metrics = {
    'accuracy': accuracy_score(y_test, y_test_pred),
    'precision': precision_score(y_test, y_test_pred, average='macro'),
    'recall': recall_score(y_test, y_test_pred, average='macro'),
    'f1_score': f1_score(y_test, y_test_pred, average='macro')
}

cnn_metrics = {
    'accuracy': accuracy_score(y_test, y_test_pred_cnn),
    'precision': precision_score(y_test, y_test_pred_cnn, average='macro'),
    'recall': recall_score(y_test, y_test_pred_cnn, average='macro'),
    'f1_score': f1_score(y_test, y_test_pred_cnn, average='macro')
}

compare_models(svm_metrics, cnn_metrics)




Best parameters: {'C': 1, 'gamma': 'scale'}
Validation Accuracy: 0.653125
SVM Test Accuracy: 0.66
Precision: 0.6605156787426878
Recall: 0.6598414960374009
F1-score: 0.6595829891617231
Confusion Matrix: 
[[125  74]
 [ 62 139]]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20


  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 229ms/step - accuracy: 0.5179 - loss: 0.8018 - val_accuracy: 0.4500 - val_loss: 0.7050
Epoch 2/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 256ms/step - accuracy: 0.5259 - loss: 0.6860 - val_accuracy: 0.4500 - val_loss: 0.6935
Epoch 3/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 243ms/step - accuracy: 0.5264 - loss: 0.6882 - val_accuracy: 0.4656 - val_loss: 0.6979
Epoch 4/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 215ms/step - accuracy: 0.5787 - loss: 0.6671 - val_accuracy: 0.5750 - val_loss: 0.6534
Epoch 5/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 229ms/step - accuracy: 0.6071 - loss: 0.6488 - val_accuracy: 0.5562 - val_loss: 0.7038
Epoch 6/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 226ms/step - accuracy: 0.6503 - loss: 0.6172 - val_accuracy: 0.5844 - val_loss: 0.6805
Epoch 7/20
[1m40/40[0m [32m━━━━

In [4]:
optimizers = {
    'Adam': tf.keras.optimizers.Adam(),
    'SGD': tf.keras.optimizers.SGD(),
    'RMSprop': tf.keras.optimizers.RMSprop()
}

# Train and evaluate models with different optimizers
for optimizer_name, optimizer_instance in optimizers.items():
    print(f"Training model with {optimizer_name} optimizer...")
    
    # Create CNN model
    cnn_model = create_cnn(input_shape, num_classes)
    
    # Compile CNN model with the specified optimizer
    cnn_model.compile(optimizer=optimizer_instance, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    # Train CNN model
    cnn_model.fit(datagen.flow(X_train, y_train, batch_size=batch_size), epochs=20, validation_data=(X_val, y_val))
    
    # Evaluate CNN model
    test_loss, test_accuracy = cnn_model.evaluate(X_test, y_test)
    print(f"Test Accuracy with {optimizer_name} optimizer: {test_accuracy}")

Training model with Adam optimizer...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20


  self._warn_if_super_not_called()


[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 229ms/step - accuracy: 0.4945 - loss: 1.0257 - val_accuracy: 0.5500 - val_loss: 0.6921
Epoch 2/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 226ms/step - accuracy: 0.4794 - loss: 0.6938 - val_accuracy: 0.4500 - val_loss: 0.6934
Epoch 3/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 219ms/step - accuracy: 0.5382 - loss: 0.6919 - val_accuracy: 0.5094 - val_loss: 0.6933
Epoch 4/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 218ms/step - accuracy: 0.5660 - loss: 0.6849 - val_accuracy: 0.5938 - val_loss: 0.6765
Epoch 5/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 218ms/step - accuracy: 0.5656 - loss: 0.6804 - val_accuracy: 0.5375 - val_loss: 0.6936
Epoch 6/20
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 225ms/step - accuracy: 0.5958 - loss: 0.6704 - val_accuracy: 0.5906 - val_loss: 0.6692
Epoch 7/20
[1m40/40[0m [32m━━━━━━