# Feature Pre-Extraction for SLAM

This notebook extracts features from dataset and saves them locally in .npy format

In [None]:
import configparser
import os
import numpy as np
import ipywidgets as widgets
from ipywidgets import interact, interact_manual, interactive
from IPython.display import display

import time
import pandas as pd
from PIL import Image
import glob
from feature_extraction.feature_extractor_holistic import AlexNetConv3Extractor, HDCDELF, SAD
from feature_extraction.feature_extractor_patchnetvlad import PatchNetVLADFeatureExtractor
from feature_extraction.feature_extractor_cosplace import CosPlaceFeatureExtractor
from feature_extraction.feature_extractor_eigenplaces import EigenPlacesFeatureExtractor
from feature_extraction.feature_extractor_boq import BoQFeatureExtractor
from patchnetvlad.tools import PATCHNETVLAD_ROOT_DIR

## Setup Dataset Input Directory

In [None]:
# --- Widget for Dataset Path ---
dataset_path_widget = widgets.Text(
    value='data',
    placeholder='Enter path to image directory',
    description='Dataset Path:',
    disabled=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%')
)
display(dataset_path_widget)

### Load the Data

In [None]:
# Get path from widget
dataset_path = dataset_path_widget.value

# Find images
image_extensions = ['*.jpg', '*.png', '*.jpeg', '*.bmp', '*.tiff']
image_paths = []
if os.path.isdir(dataset_path):
    print(f"Searching for images in: {dataset_path}")
    
    for ext in image_extensions:

        pattern = os.path.join(dataset_path, '**', ext)
        image_paths.extend(glob.glob(pattern, recursive=True))

    image_paths = sorted(image_paths)

    if not image_paths:
         print(f"Warning: No images found with extensions {image_extensions} in {dataset_path}")
    else:
         print(f"Found {len(image_paths)} images.")
else:
    print(f"Error: Dataset path not found or is not a directory: {dataset_path}")
    image_paths = [] 

# Extract Features

### Setup Extraction Parameters

In [None]:
# --- Selecting a descriptor ---
descriptor_widget = widgets.Dropdown(
    options=['HDC-DELF', 'AlexNet', 'NetVLAD', 'PatchNetVLAD', 'CosPlace', 'EigenPlaces', 'SAD', 'BoQ-ResNet50', 'BoQ-DinoV2'],
    value='BoQ-DinoV2',
    description='Descriptor:',
    disabled=False,
)
display(descriptor_widget)

# --- Batch Size ---
batch_size_widget = widgets.IntSlider(
    value=8,
    min=1,
    max=64,
    step=1,
    description='Batch Size:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d'
)
display(batch_size_widget)

# --- Output Directory ---
output_dir_widget = widgets.Text(
    value='feature_extraction_output',
    placeholder='Enter base directory for output',
    description='Output Dir:',
    disabled=False,
     style={'description_width': 'initial'},
    layout=widgets.Layout(width='80%')
)
display(output_dir_widget)

### Prepare Feature Extractor

In [None]:
# --- Get selections from widgets ---
selected_model = descriptor_widget.value
base_output_dir = output_dir_widget.value
batch_size = batch_size_widget.value

# --- Setup Output Dirs ---
csv_dir = os.path.join(base_output_dir, 'csv')
npy_dir = os.path.join(base_output_dir, 'npy', selected_model)
combined_npy_dir = os.path.join(base_output_dir, 'npy', selected_model + '-stacked')
os.makedirs(csv_dir, exist_ok=True)
os.makedirs(npy_dir, exist_ok=True)
os.makedirs(combined_npy_dir, exist_ok=True)
csv_output_path = os.path.join(csv_dir, f"metadata_{selected_model}.csv")
print(f"CSV will be saved to: {csv_output_path}")
print(f".npy files will be saved to: {npy_dir}")
print(f"Combined .npy feature vector will be saved to: {combined_npy_dir}")


# --- Model Configuration & Initialization ---
feature_extractor = None
print(f"Initializing feature extractor: {selected_model}...")
if selected_model == 'AlexNet':
    feature_extractor = AlexNetConv3Extractor()
elif selected_model == 'HDC-DELF':
    feature_extractor = HDCDELF()
elif selected_model == 'SAD':
    feature_extractor = SAD()
elif selected_model in ['NetVLAD', 'PatchNetVLAD']:
    if selected_model == 'NetVLAD':
        config_file = os.path.join(PATCHNETVLAD_ROOT_DIR, 'configs/netvlad_extract.ini')
    else:
        config_file = os.path.join(PATCHNETVLAD_ROOT_DIR, 'configs/speed.ini')

    if not os.path.isfile(config_file):
        print(f"Error: Config file not found for {selected_model} at {config_file}")
        raise FileNotFoundError(f"PatchNetVLAD config missing: {config_file}")

    print(f"  Using config: {config_file}")
    config = configparser.ConfigParser()
    config.read(config_file)
    feature_extractor = PatchNetVLADFeatureExtractor(config)
elif selected_model == 'CosPlace':
    feature_extractor = CosPlaceFeatureExtractor()
elif selected_model == 'EigenPlaces':
    feature_extractor = EigenPlacesFeatureExtractor()
elif selected_model == 'BoQ-ResNet50':
    feature_extractor = BoQFeatureExtractor(backbone_name="resnet50")
elif selected_model == 'BoQ-DinoV2':
    feature_extractor = BoQFeatureExtractor(backbone_name="dinov2")
else:
    print(f"Error: Model '{selected_model}' is not recognized for initialization.")
    raise ValueError(f"Unsupported model: {selected_model}")

print("Feature extractor initialized.")

### Execute Feature Extraction

In [None]:
# --- Feature Extraction Loop ---
metadata_list = []
ordered_features = []
total_start_time = time.time()
processed_count = 0
failed_count = 0

print(f"\nProcessing {len(image_paths)} images...")

for i in range(0, len(image_paths), batch_size):
    batch_paths = image_paths[i:min(i + batch_size, len(image_paths))]
    batch_images_data = []
    batch_valid_indices = []

    # Load images for the current batch
    for idx, img_path in enumerate(batch_paths):
        try:
            img = Image.open(img_path).convert('RGB')
            batch_images_data.append(np.array(img))
            batch_valid_indices.append(idx)
        except Exception as e:
            print(f"Warning: Could not load image {os.path.basename(img_path)}. Skipping. Error: {e}")
            failed_count += 1

    if not batch_images_data:
        if batch_paths:
             print(f"Warning: No images could be loaded in batch starting with {os.path.basename(batch_paths[0])}. Skipping batch.")
        continue

    # Process the valid images in the batch
    try:
        batch_start_time = time.time()

        feature_vectors_batch = feature_extractor.compute_features(batch_images_data)
        batch_end_time = time.time()
        batch_processing_time = batch_end_time - batch_start_time
        time_per_image = batch_processing_time / len(batch_images_data) if batch_images_data else 0

        # Check if output length matches input length
        if len(feature_vectors_batch) != len(batch_images_data):
             print(f"Error: Mismatch between input batch size ({len(batch_images_data)}) and output features ({len(feature_vectors_batch)}) for batch starting with {os.path.basename(batch_paths[0])}. Skipping results for this batch.")
             failed_count += len(batch_images_data)
             continue

        # Save features and record metadata for successfully processed images
        for batch_idx, feature_vector in enumerate(feature_vectors_batch):

            original_index_in_batch = batch_valid_indices[batch_idx]
            img_filepath = batch_paths[original_index_in_batch]
            frame_id = os.path.basename(img_filepath)
            feature_dim = feature_vector.shape
            ordered_features.append(feature_vector)

            # Define .npy output path
            npy_filename = f"{os.path.splitext(frame_id)[0]}.npy"
            feature_path_npy = os.path.join(npy_dir, npy_filename)

            # Save .npy file
            np.save(feature_path_npy, feature_vector)

            # Store Metadata
            metadata_list.append({
                'frame_id': frame_id,
                'model_name': selected_model,
                'processing_time_sec': time_per_image,
                'feature_dim': str(feature_dim),
                'feature_path_npy': feature_path_npy,
                'original_path': img_filepath
            })
            processed_count += 1

        print(f"  Processed batch {i//batch_size + 1}/{ (len(image_paths) + batch_size - 1)//batch_size} ({processed_count}/{len(image_paths)} images done)")


    except Exception as e:
        print(f"Error processing batch starting with {os.path.basename(batch_paths[0])}: {e}")
        failed_count += len(batch_images_data)

### Summary & Metadata

In [None]:
# --- Final Summary & Save Metadata ---
total_end_time = time.time()
print("\n--- Extraction Summary ---")
print(f"Successfully processed: {processed_count} images")
if failed_count > 0:
    print(f"Failed/Skipped:       {failed_count} images")
print(f"Total processing time: {total_end_time - total_start_time:.2f} seconds.")

if metadata_list:
    metadata_df = pd.DataFrame(metadata_list)
    cols_order = ['frame_id', 'model_name', 'processing_time_sec', 'feature_dim', 'feature_path_npy', 'original_path']
    metadata_df = metadata_df[[col for col in cols_order if col in metadata_df.columns]]


    # Save to CSV
    metadata_df.to_csv(csv_output_path, index=False)
    print(f"\nMetadata saved to: {csv_output_path}")
    print(f".npy features saved in: {npy_dir}")

    # Display head in notebook
    print("\nMetadata Preview (first 5 rows):")
    display(metadata_df.head())

    # Stack features from list and save them
    ordered_features = np.vstack(ordered_features)
    npy_filename = "stacked_features.npy"
    combined_npy_path = os.path.join(combined_npy_dir, npy_filename) 
    np.save(combined_npy_path, ordered_features)
    print(f"Stacked features of {ordered_features.shape} shape saved to: {combined_npy_path}")

else:
    print("\nNo metadata was generated (or all images failed).")