In [1]:
import torch
import cv2
import numpy as np
from torchreid.reid.utils import FeatureExtractor
import pickle



In [2]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
device

'mps'

In [3]:
extractor = FeatureExtractor(
    model_name='osnet_x1_0',
    model_path=None,
    device=device
)

# TODO: add randomness (flip, crop, erase, jitter, etc...) via transformations

Successfully loaded imagenet pretrained weights from "/Users/ooj/.cache/torch/checkpoints/osnet_x1_0_imagenet.pth"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']
Model: osnet_x1_0
- params: 2,193,616
- flops: 978,878,352


In [4]:
img = cv2.imread("media/bus.jpg")

In [5]:
features = extractor([img])
print("Extracted embedding shape:", features.shape)
print("Embedding vector (first 10 dims):", features[0][:10])

Extracted embedding shape: torch.Size([1, 512])
Embedding vector (first 10 dims): tensor([0.0000, 2.1946, 1.4717, 0.0000, 2.2121, 1.3011, 2.6642, 1.5508, 0.0000,
        1.7066], device='mps:0')


In [6]:
cap = cv2.VideoCapture(0)
embeddings = []
max_samples = 100
captured_frames = 0

In [7]:
while captured_frames < max_samples:
    ret, frame = cap.read()

    noisy = [frame]
    
    brightness_factor = np.random.uniform(0.5, 1.5)
    frame_bright = cv2.convertScaleAbs(frame, alpha=brightness_factor, beta=0)
    noisy.append(frame_bright)

    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    saturation_factor = np.random.uniform(0.5, 1.5)
    hsv[:, :, 1] = hsv[:, :, 1] * saturation_factor
    frame_saturation = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    noisy.append(frame_saturation)

    noise = np.random.normal(scale=0.5, size=frame.shape).astype(np.uint8)
    noisy_frame = cv2.add(frame, noise)
    noisy.append(noisy_frame)

    R, G, B = cv2.split(frame)
    output1_R = cv2.equalizeHist(R)
    output1_G = cv2.equalizeHist(G)
    output1_B = cv2.equalizeHist(B)
    eq_frame = cv2.merge((output1_R, output1_G, output1_B))
    noisy.append(eq_frame)

    combined_frame = np.hstack((frame, frame_bright,  frame_saturation, noisy_frame, eq_frame))
    cv2.imshow(f"Capturing sample...", combined_frame)

    for noisy_frame in noisy:
        embedding_tensor = extractor([noisy_frame])
        embedding = embedding_tensor.cpu().numpy().flatten()
        norm = np.linalg.norm(embedding)
        embedding = embedding if norm == 0 else embedding / norm
        embeddings.append(embedding)

    captured_frames += 1
    print(f"Campured sample {captured_frames}/{max_samples}")

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

In [8]:
cap.release()
cv2.destroyAllWindows()

In [9]:
filename = f"data/test3_embeddings.pkl"
with open(filename, "wb") as f:
    pickle.dump(embeddings, f)
print(f"Enrollment complete. Embeddings saved to {filename}")

Enrollment complete. Embeddings saved to data/test3_embeddings.pkl


# train model instead of generate embeddings
