In [2]:
import cv2
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.cluster import MiniBatchKMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA
import os, tqdm
import pandas as pd
import seaborn as sns
from sklearn.svm import SVC
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import auc, roc_curve
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.metrics import PrecisionRecallDisplay
from collections import Counter
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.image import extract_patches_2d

from PIL import Image

from skimage.feature import fisher_vector, learn_gmm

from typing import *

import wandb
os.environ["WANDB_ENTITY"] = "c3-mcv"
wandb.login(key = '14a56ed86de5bf43e377d95d05458ca8f15f5017', relogin=True)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Luis/.netrc


True

In [11]:
from keras.models import Sequential
from keras.layers import Dense, Reshape, Input
def build_mlp(input_size, phase='train'):
    model = Sequential()
    model.add(Input(shape=(input_size, input_size, 3,), name='input'))
    model.add(Reshape((input_size*input_size*3,)))
    model.add(Dense(units=2048, activation='relu'))

    # In the feature extractor phase, stop building the model before the last layer
    if phase == 'feature_extractor':
        return model
    
    else:
        model.add(Dense(units=8, activation='linear' if phase == 'test' else 'softmax'))
        return model

In [17]:
class MLP_BoVW():
    def __init__(self, config, size_per_class=1e9, data_path='./MIT_split', model_path = './models/mlp.h5'):
        """
        Bag-of-Visual-Words (BoVW) image classifier.

        Parameters:
        - config: Dictionary containing configuration parameters for the BoVW model.
        - size_per_class: Maximum number of images per class to use for training.
        - data_path: Path to the dataset folder.
        """
        self.config = config
        self.data_path = data_path
        self.size_per_class = size_per_class
        self._initialize_datasets()

        # Compute features for each split
        if self.config['features'] == 'mlp':  
            self.train_features, self.test_features = self._compute_features_mlp(model_path)
        elif self.config['features'] == 'dense_sift':
            self.train_features, self.test_features = self._compute_features_dense_sift()

        # Classification
        if self.config['classifier'] == 'knn':
            self.classifier = KNeighborsClassifier(n_neighbors=self.config['n_neigh'], n_jobs=-1, metric=self.config['metric'])
        elif self.config['classifier'] == 'svm':
            self.classifier = SVC(kernel = self.config['kernel'], degree=self.config['degree_pol'], class_weight = 'balanced', gamma = 'auto', C = self.config['C'], probability=True, random_state=123)
        elif self.config['classifier'] == 'logistic':
            self.classifier = LogisticRegression(multi_class = 'auto', penalty='l2', max_iter=300, solver='lbfgs', C = self.config['C'], class_weight = 'balanced', n_jobs=-1, random_state=123)
        
        # Dimensionality reduction
        self.dim_red = None
        if self.config['n_components'] > 0:
            self.dim_red = PCA(n_components = self.config['n_components'])

        # Standarization
        self.scaler = None
        if self.config['scaler']:
            self.scaler = StandardScaler(with_mean=True, with_std=True)

    def _compute_features_mlp(self, model_path = './models/mlp.h5'):
        """
        Computes the features for the train and test splits using a MLP.
        """
        model = build_mlp(input_size=self.config['patch_size'], phase='feature_extractor')
        # model.load_weights(model_path)

        for i,dataset in enumerate([self.train_dataset_blocks['image_paths'], self.test_dataset_blocks['image_paths']]):
            batch = []
            for filename in tqdm.tqdm(dataset, desc='Extracting features from dataset %d' % i):
                batch.append(np.asarray(Image.open(filename)))
            batch = np.array(batch)

            if i == 0:
                train_features = model.predict(batch)
            else:
                test_features = model.predict(batch)
        
        return train_features, test_features
    
    def _compute_features_dense_sift(self):
        """
        Computes the features for the train and test splits using dense SIFT.
        """
        
        return train_features, test_features

    def _create_directory(self, path):
        if not os.path.exists(path):
            os.makedirs(path, exist_ok=True)

    def _extract_patches(self, image_path, save_path, dataset_blocks, steps):
        """
        Splits an image into patches.

        :param image_path: Path to the input image.
        :param destination_path: Path where the patches will be saved.
        :param dataset_blocks: Dictionary containing the paths to the patches and their corresponding labels.
        :param steps: Number of steps to move the sliding window.
        """
        # Load the image
        image = Image.open(image_path)

        i = 0
        # Extract and save patches
        for x in steps:
            for y in steps:
                patch_path = os.path.join(save_path, f"{os.path.splitext(os.path.basename(image_path))[0]}_{i}.jpg")
                box = (x, y, x + self.config['patch_size'], y + self.config['patch_size'])
                image.crop(box).save(patch_path)
                dataset_blocks['image_paths'].append(patch_path)
                dataset_blocks['labels'].append(os.path.basename(save_path))
                i += 1
        

    def _process_split(self, split, block_path):
        dataset = {'image_paths': [], 'labels': []}
        dataset_blocks = {'image_paths': [], 'labels': []}
        split_path = os.path.join(self.data_path, split)

        # Calculate the number of patches
        steps = range(0, 256 - self.config['patch_size'] + 1, self.config['patch_size'] - self.config['overlap'])

        for label in tqdm.tqdm(os.listdir(split_path), desc=f'Creating {split} patches...'):
            label_path = os.path.join(split_path, label)
            self._create_directory(os.path.join(block_path, split, label))

            for i, image_name in enumerate(os.listdir(label_path)):
                if i >= self.size_per_class:
                    break
                image_path = os.path.join(label_path, image_name)
                dataset['image_paths'].append(image_path)
                dataset['labels'].append(label)

                self._extract_patches(image_path, os.path.join(block_path, split, label), dataset_blocks, steps)

        dataset['labels'] = np.array(dataset['labels'])
        return dataset, dataset_blocks

    def _initialize_datasets(self):
        block_path = self.data_path + f'_{self.config["patch_size"]}_blocks'
        self._create_directory(block_path)
        self.train_dataset, self.train_dataset_blocks = self._process_split('train', block_path)
        self.test_dataset, self.test_dataset_blocks = self._process_split('test', block_path)

config = {
    'patch_size': 128,
    'overlap': 64,
    'features': 'mlp',
    'classifier': 'logistic',
    'scaler': True,
    'n_components': 0,
    'n_neigh': 1,
    'metric': 'euclidean',
    'kernel': 'linear',
    'degree_pol': 3,
    'C': 1,
}
bovw = MLP_BoVW(config, size_per_class=50, data_path='./MIT_split')

Creating train patches...: 100%|██████████| 8/8 [00:11<00:00,  1.41s/it]
Creating test patches...: 100%|██████████| 8/8 [00:14<00:00,  1.86s/it]
Extracting features from dataset 0: 100%|██████████| 3600/3600 [00:53<00:00, 67.69it/s]




In [22]:
bovw.train_features[0].shape

(3600, 2048)