In [None]:
import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import transforms, models
from PIL import Image
import cv2
import numpy as np
from google.colab import drive
import time
from tqdm import tqdm

# 1. Install Libraries
print("Installing Ultralytics...")
!pip install ultralytics -q
print("Installing Torchreid...")
!pip install torchreid -q

# 2. Mount Google Drive
print("Mounting Google Drive...")
drive.mount('/content/drive')

from ultralytics import YOLO
import torchreid
import torch.nn.functional as F
print("All libraries imported successfully.")

Installing Ultralytics...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m50.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling Torchreid...
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.7/92.7 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for torchreid (setup.py) ... [?25l[?25hdone
Mounting Google Drive...
Mounted at /content/drive
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.




All libraries imported successfully.


In [2]:
class BasicConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(BasicConv2d, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False)
        self.bn = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.conv(x); x = self.bn(x); x = self.relu(x); return x

class GaitCNNLSTM(nn.Module):
    def __init__(self, embedding_dim=256, num_subjects=74, lstm_hidden_dim=512):
        super(GaitCNNLSTM, self).__init__()
        self.embedding_dim = embedding_dim
        self.num_subjects = num_subjects
        self.lstm_hidden_dim = lstm_hidden_dim
        self.conv1 = BasicConv2d(1, 32, 5, 1, 2)
        self.conv2 = BasicConv2d(32, 32, 3, 1, 1)
        self.maxpool1 = nn.MaxPool2d(2, 2)
        self.conv3 = BasicConv2d(32, 64, 3, 1, 1)
        self.conv4 = BasicConv2d(64, 64, 3, 1, 1)
        self.maxpool2 = nn.MaxPool2d(2, 2)
        self.conv5 = BasicConv2d(64, 128, 3, 1, 1)
        self.conv6 = BasicConv2d(128, 128, 3, 1, 1)
        self.cnn_feature_size = 128 * 16 * 16
        self.lstm = nn.LSTM(self.cnn_feature_size, self.lstm_hidden_dim, 1, batch_first=True)
        self.fc1 = nn.Linear(self.lstm_hidden_dim, self.embedding_dim)
        self.classifier = nn.Linear(self.embedding_dim, self.num_subjects)

    def forward(self, x):
        batch_size, seq_len, _, _, _ = x.shape
        x = x.view(batch_size * seq_len, 1, 64, 64)
        x = self.conv1(x); x = self.conv2(x); x = self.maxpool1(x)
        x = self.conv3(x); x = self.conv4(x); x = self.maxpool2(x)
        x = self.conv5(x); x = self.conv6(x)
        x = x.view(batch_size * seq_len, -1)
        x = x.view(batch_size, seq_len, self.cnn_feature_size)
        lstm_out, (h_n, c_n) = self.lstm(x)
        x = h_n.squeeze(0)
        embedding = self.fc1(x)
        logits = self.classifier(embedding)
        return logits, embedding

print("Model class (GaitCNNLSTM) defined.")

Model class (GaitCNNLSTM) defined.


In [3]:
# --- 1. SET UP DEVICE AND HYPERPARAMETERS ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

GAIT_EMBEDDING_DIM = 256
NUM_SUBJECTS = 74
SEQ_LEN = 30

# --- 2. Load GAIT Model (GaitCNNLSTM) ---
print("Loading trained GaitCNNLSTM model...")
gait_model = GaitCNNLSTM(GAIT_EMBEDDING_DIM, NUM_SUBJECTS).to(device)
gait_model_path = "/content/drive/MyDrive/CAPSTONE/my_gait_cnnlstm.pth"
gait_model.load_state_dict(torch.load(gait_model_path))
gait_model.eval()
print("GaitCNNLSTM model loaded successfully.")

# --- 3. Load APPEARANCE Model (Torchreid OSNet) ---
print("Loading pre-trained Appearance (OSNet) model from Torchreid...")
appearance_model = torchreid.models.build_model(name='osnet_x1_0', num_classes=751, pretrained=True)
appearance_model = appearance_model.to(device)
appearance_model.eval()
print("Appearance (OSNet) model loaded successfully. (Output dim: 512)")

# --- 4. Load TRACKING Model (YOLOv8-Seg) ---
print("Loading YOLOv8-Seg model for tracking...")
yolo_seg_model = YOLO('yolov8n-seg.pt')
yolo_seg_model.to(device)
print("YOLOv8-Seg model loaded successfully.")

# --- 5. Load POSE Model (YOLOv8-Pose) ---
print("Loading YOLOv8-Pose model...")
pose_model = YOLO('yolov8n-pose.pt')
pose_model.to(device)
print("YOLOv8-Pose model loaded successfully.")

# --- 6. Define ALL preprocessing transforms ---
gait_transform = transforms.Compose([transforms.Resize((64, 64)), transforms.ToTensor()])
appearance_transform = transforms.Compose([
    transforms.Resize((256, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Helper function to get body ratios
def get_body_ratios(kpts):
    try:
        nose_y = kpts[0, 1]; shoulder_y = (kpts[5, 1] + kpts[6, 1]) / 2.0
        hip_y = (kpts[11, 1] + kpts[12, 1]) / 2.0; ankle_y = (kpts[15, 1] + kpts[16, 1]) / 2.0
        total_height = ankle_y - nose_y; torso_height = hip_y - shoulder_y; leg_height = ankle_y - hip_y
        if total_height > 1 and leg_height > 1:
            return torch.tensor([torso_height / total_height, leg_height / total_height], dtype=torch.float32).to(device)
    except Exception: pass
    return None

print("\nAll models and transforms are ready for enrollment.")

Using device: cuda
Loading trained GaitCNNLSTM model...
GaitCNNLSTM model loaded successfully.
Loading pre-trained Appearance (OSNet) model from Torchreid...


Downloading...
From: https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY
To: /root/.cache/torch/checkpoints/osnet_x1_0_imagenet.pth
100%|██████████| 10.9M/10.9M [00:00<00:00, 29.7MB/s]


Successfully loaded imagenet pretrained weights from "/root/.cache/torch/checkpoints/osnet_x1_0_imagenet.pth"
** The following layers are discarded due to unmatched keys or layer size: ['classifier.weight', 'classifier.bias']
Appearance (OSNet) model loaded successfully. (Output dim: 512)
Loading YOLOv8-Seg model for tracking...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-seg.pt to 'yolov8n-seg.pt': 100% ━━━━━━━━━━━━ 6.7MB 303.2MB/s 0.0s
YOLOv8-Seg model loaded successfully.
Loading YOLOv8-Pose model...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n-pose.pt to 'yolov8n-pose.pt': 100% ━━━━━━━━━━━━ 6.5MB 266.5MB/s 0.0s
YOLOv8-Pose model loaded successfully.

All models and transforms are ready for enrollment.


In [None]:
# --- 1. ⚠️ DEFINE YOUR PEOPLE AND VIDEOS HERE ---
# Format: ("UNIQUE_PERSON_NAME", "/path/to/their/video.mp4")
people_to_enroll = [
    ("Naman", "/content/drive/MyDrive/CAPSTONE/Custom_Dataset/house-1/Naman.mp4"),
    ("Nishant", "/content/drive/MyDrive/CAPSTONE/Custom_Dataset/house-1/Nishant.mp4"),
    ("Aadishesh", "/content/drive/MyDrive/CAPSTONE/Custom_Dataset/house-1/Aadishesh.mp4"),
]
# ------------------------------------------------

# --- 2. DEFINE YOUR GALLERY SAVE PATH ---
gallery_save_path = "/content/drive/MyDrive/CAPSTONE/my_known_gallery.pth"
# ----------------------------------------

gallery_of_known_people = {}
print(f"Starting enrollment for {len(people_to_enroll)} people...")

for person_name, video_path in people_to_enroll:
    print(f"\n--- Processing video for: {person_name} ---")
    print(f"Video path: {video_path}")

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error: Could not open video file {video_path}. Skipping.")
        continue

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Temporary storage for this person
    tracked_gait_sequences = {}
    tracked_appearance_crops = {}
    tracked_body_ratios = {}

    # Store all final embeddings for this person
    all_final_embeddings = []

    try:
        for _ in tqdm(range(total_frames), desc=f"Enrolling {person_name}"):
            ret, frame = cap.read()
            if not ret:
                break

            # --- a. Run Tracker ---
            # We assume only one person is in the enrollment video
            results = yolo_seg_model.track(frame, persist=True, classes=0, verbose=False)

            if results[0].masks is not None and results[0].boxes.id is not None:

                # --- Find the main person (usually track_id 1, or the first one) ---
                mask_tensor = results[0].masks.data[0]
                box = results[0].boxes.data[0]
                track_id = results[0].boxes.id.int().cpu().tolist()[0]

                x1, y1, x2, y2 = [int(i) for i in box[:4]]

                # --- b. Process GAIT ---
                mask_np = mask_tensor.cpu().numpy() * 255
                silhouette_tensor = gait_transform(Image.fromarray(mask_np).convert('L')).to(device)

                # --- c. Process APPEARANCE ---
                crop_img = frame[y1:y2, x1:x2]
                appearance_tensor = appearance_transform(Image.fromarray(cv2.cvtColor(crop_img, cv2.COLOR_BGR2RGB))).to(device)

                # --- d. Process POSE ---
                pose_results = pose_model(crop_img, verbose=False)
                body_ratio_tensor = None
                if len(pose_results[0].keypoints.data) > 0:
                    kpts = pose_results[0].keypoints.data[0].cpu().numpy()[:, :2]
                    body_ratio_tensor = get_body_ratios(kpts)

                # --- e. Store ALL features ---
                if track_id not in tracked_gait_sequences:
                    tracked_gait_sequences[track_id] = []
                    tracked_appearance_crops[track_id] = []
                    tracked_body_ratios[track_id] = []

                tracked_gait_sequences[track_id].append(silhouette_tensor)
                tracked_appearance_crops[track_id].append(appearance_tensor)
                if body_ratio_tensor is not None:
                    tracked_body_ratios[track_id].append(body_ratio_tensor)

                # --- f. Check for Full Sequence & Create Embedding ---
                if len(tracked_gait_sequences[track_id]) == SEQ_LEN:
                    gait_sequence = torch.stack(tracked_gait_sequences[track_id], dim=0).unsqueeze(0)
                    appearance_sequence = torch.stack(tracked_appearance_crops[track_id], dim=0)

                    if len(tracked_body_ratios[track_id]) > 0:
                        ratio_sequence = torch.stack(tracked_body_ratios[track_id], dim=0)
                    else:
                        ratio_sequence = torch.zeros((SEQ_LEN, 2)).to(device)

                    with torch.no_grad():
                        _ , gait_embedding = gait_model(gait_sequence)
                        appearance_embedding = torch.mean(appearance_model(appearance_sequence), dim=0, keepdim=True)
                        body_ratio_embedding = torch.mean(ratio_sequence, dim=0, keepdim=True)

                        # --- NORMALIZE & FUSE ---
                        gait_embedding = F.normalize(gait_embedding, p=2, dim=1)
                        appearance_embedding = F.normalize(appearance_embedding, p=2, dim=1)
                        body_ratio_embedding = F.normalize(body_ratio_embedding, p=2, dim=1)

                        final_embedding = torch.cat((gait_embedding, appearance_embedding, body_ratio_embedding), dim=1)

                        all_final_embeddings.append(final_embedding)
                        

                    # Clear sequences
                    tracked_gait_sequences[track_id] = []
                    tracked_appearance_crops[track_id] = []
                    tracked_body_ratios[track_id] = []

    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        cap.release()

    # --- g. Create the "Master" Embedding ---
    if len(all_final_embeddings) > 0:
        master_embedding = torch.mean(torch.cat(all_final_embeddings, dim=0), dim=0, keepdim=True)
        gallery_of_known_people[person_name] = master_embedding
        print(f"✅ Enrollment SUCCESS for {person_name}. Generated {len(all_final_embeddings)} embeddings and averaged them.")
        print(f"   Final embedding shape: {master_embedding.shape}")
    else:
        print(f"❌ Enrollment FAILED for {person_name}. No full sequences were captured.")

# --- 3. Save the final gallery ---
torch.save(gallery_of_known_people, gallery_save_path)
print(f"\n--- ALL DONE ---")
print(f"Gallery with {len(gallery_of_known_people)} people saved to: {gallery_save_path}")

Starting enrollment for 3 people...

--- Processing video for: Naman ---
Video path: /content/drive/MyDrive/CAPSTONE/Custom_Dataset/house-1/Naman.mp4


Enrolling Naman: 100%|██████████| 1758/1758 [01:31<00:00, 19.22it/s]


✅ Enrollment SUCCESS for Naman. Generated 48 embeddings and averaged them.
   Final embedding shape: torch.Size([1, 770])

--- Processing video for: Nishant ---
Video path: /content/drive/MyDrive/CAPSTONE/Custom_Dataset/house-1/Nishant.mp4


Enrolling Nishant: 100%|██████████| 2288/2288 [01:51<00:00, 20.58it/s]


✅ Enrollment SUCCESS for Nishant. Generated 58 embeddings and averaged them.
   Final embedding shape: torch.Size([1, 770])

--- Processing video for: Aadishesh ---
Video path: /content/drive/MyDrive/CAPSTONE/Custom_Dataset/house-1/Aadishesh.mp4


Enrolling Aadishesh: 100%|██████████| 2030/2030 [01:37<00:00, 20.83it/s]

✅ Enrollment SUCCESS for Aadishesh. Generated 46 embeddings and averaged them.
   Final embedding shape: torch.Size([1, 770])

--- ALL DONE ---
Gallery with 3 people saved to: /content/drive/MyDrive/CAPSTONE/my_known_gallery.pth





In [11]:
import torch

print(f"--- Inspecting {len(gallery_of_known_people)} Enrolled People ---")

if not gallery_of_known_people:
    print("Gallery is empty. No one is enrolled.")
else:
    for person_name, embedding in gallery_of_known_people.items():
        print(f"\nPerson ID: {person_name}")

        # Move to CPU for analysis, just in case it's on GPU
        embedding_cpu = embedding.cpu()

        print(f"  Embedding Shape:     {embedding_cpu.shape}")

        # Print summary statistics
        print(f"  Mean:      {embedding_cpu.mean().item():.4f}")
        print(f"  Std Dev:   {embedding_cpu.std().item():.4f}")
        print(f"  Min:       {embedding_cpu.min().item():.4f}")
        print(f"  Max:       {embedding_cpu.max().item():.4f}")

        # Show a small slice to "see" the actual feature values
        # We use .numpy() for a cleaner print
        print(f"  Slice (first 5 features): {embedding_cpu[0, :5].numpy()}")

--- Inspecting 3 Enrolled People ---

Person ID: Naman
  Embedding Shape:     torch.Size([1, 770])
  Mean:      0.0227
  Std Dev:   0.0492
  Min:       -0.0922
  Max:       0.8240
  Slice (first 5 features): [   0.015047   -0.023165   -0.032798   -0.092175    0.023115]

Person ID: Nishant
  Embedding Shape:     torch.Size([1, 770])
  Mean:      0.0233
  Std Dev:   0.0511
  Min:       -0.1315
  Max:       0.7495
  Slice (first 5 features): [   -0.03911   -0.045523    0.017388   -0.094984   -0.012199]

Person ID: Aadishesh
  Embedding Shape:     torch.Size([1, 770])
  Mean:      0.0229
  Std Dev:   0.0489
  Min:       -0.0866
  Max:       0.7856
  Slice (first 5 features): [   0.022283    -0.03213   -0.050698   -0.083325    0.025126]
