In [None]:
#Best Code Below

In [8]:
import cv2
import numpy as np
import time
from deep_sort_realtime.deepsort_tracker import DeepSort
from ultralytics import YOLO
from collections import defaultdict, deque
import torch
import torchvision.transforms as T
from torchvision.models import resnet50
from sklearn.metrics.pairwise import cosine_similarity

# Load ResNet-50 feature extractor with modified architecture for better re-ID
# Improved FeatureExtractor with more discriminative features
class FeatureExtractor(torch.nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        resnet = resnet50(pretrained=True)
        # Remove the final classification layer
        self.features = torch.nn.Sequential(*list(resnet.children())[:-2])
        self.avgpool = torch.nn.AdaptiveAvgPool2d((1, 1))
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(2048, 512),
            torch.nn.BatchNorm1d(512),
            torch.nn.ReLU(),
            torch.nn.Dropout(0.5),
            torch.nn.Linear(512, 256)  
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        # L2 normalization
        x = torch.nn.functional.normalize(x, p=2, dim=1)
        return x

resnet = FeatureExtractor()
resnet.eval()
device = torch.device("cpu")
resnet.to(device)

# Enhanced preprocessing with augmentation for better viewpoint invariance
transform = T.Compose([
    T.ToPILImage(),
    T.Resize((256, 128)),
    T.RandomHorizontalFlip(p=0.5),  # Helps with left/right invariance
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]),
])

# Improved feature extraction with error handling
def extract_deep_features(frame, bbox):
    x1, y1, x2, y2 = map(int, bbox)
    # Expand bounding box slightly to include more context
    x1 = max(0, x1 - 10)
    y1 = max(0, y1 - 10)
    x2 = min(frame.shape[1], x2 + 10)
    y2 = min(frame.shape[0], y2 + 10)
    
    person_roi = frame[y1:y2, x1:x2]
    if person_roi.size == 0:
        return None
    
    try:
        img = transform(person_roi).unsqueeze(0).to(device)
        with torch.no_grad():
            features = resnet(img).squeeze().cpu().numpy()
        # L2 normalization for cosine similarity
        features = features / (np.linalg.norm(features)) + 1e-12
        return features
    except Exception as e:
        print(f"Feature extraction error: {e}")
        return None

# Enhanced clothing color histogram with spatial pyramid
def extract_clothing_histogram(frame, bbox):
    x1, y1, x2, y2 = map(int, bbox)
    height = y2 - y1
    lower_y1 = y1 + int(0.4 * height)
    torso = frame[lower_y1:y2, x1:x2]
    if torso.size == 0:
        return None
    
    # Spatial pyramid: divide into 2x2 grid and concatenate histograms
    hsv = cv2.cvtColor(torso, cv2.COLOR_BGR2HSV)
    histograms = []
    h, w = torso.shape[:2]
    
    for i in range(2):
        for j in range(2):
            y_start = i * h // 2
            y_end = (i + 1) * h // 2
            x_start = j * w // 2
            x_end = (j + 1) * w // 2
            cell = hsv[y_start:y_end, x_start:x_end]
            hist = cv2.calcHist([cell], [0, 1], None, [8, 8], [0, 180, 0, 256])
            hist = cv2.normalize(hist, hist).flatten()
            histograms.append(hist)
    
    return np.concatenate(histograms)

# Adaptive similarity thresholding
def compare_features(feat1, feat2, method='cosine', thresholds=(0.8, 0.7)):
    if feat1 is None or feat2 is None:
        return False
    
    deep_thresh, hist_thresh = thresholds
    
    if method == 'both':
        # Require both features to match
        deep_sim = cosine_similarity([feat1[0]], [feat2[0]])[0][0] if feat1[0] is not None and feat2[0] is not None else 0
        hist_sim = cv2.compareHist(feat1[1].astype(np.float32), feat2[1].astype(np.float32), cv2.HISTCMP_CORREL) if feat1[1] is not None and feat2[1] is not None else 0
        return deep_sim > deep_thresh * 0.9 and hist_sim > hist_thresh * 0.8
    
    if method == 'cosine':
        sim = cosine_similarity([feat1], [feat2])[0][0]
        return sim > deep_thresh
    elif method == 'hist':
        sim = cv2.compareHist(feat1.astype(np.float32), feat2.astype(np.float32), cv2.HISTCMP_CORREL)
        return sim > hist_thresh
    return False

# Initialize models
model = YOLO("yolov8n.pt").to("cpu")
tracker = DeepSort(max_age=20, 
                  nn_budget=100,
                  max_cosine_distance=0.4,
                  max_iou_distance=0.7)

# Tracking system with improved memory management
total_unique_people = 0
known_persons = []  # List of dicts with ID, features, clothing, last_seen, first_seen
pending_detections = defaultdict(int)
pending_features = defaultdict(lambda: {'deep': deque(maxlen=10), 'clothing': deque(maxlen=10)})
feature_memory = defaultdict(lambda: {'deep': None, 'clothing': None, 'count': 0})

# Forgetting mechanism - remove old entries
def forget_old_persons(known_persons, max_age_seconds=300):
    current_time = time.time()
    return [p for p in known_persons if current_time - p['last_seen'] < max_age_seconds]

cap = cv2.VideoCapture(0)
frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    frame_count += 1
    
    # Periodically clean up old person records
    if frame_count % 100 == 0:
        known_persons = forget_old_persons(known_persons)

    # YOLO detection (class 0 = person)
    results = model(frame, classes=[0], conf=0.7, verbose=False)
    detections = []

    for box in results[0].boxes:
        x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
        conf = box.conf.item()
        w, h = x2 - x1, y2 - y1

        # Filter based on reasonable person dimensions
        if h < 50 or h > 500 or w/h < 0.3 or w/h > 1.5:
            continue

        detections.append([[x1, y1, w, h], conf])

    # Update tracker
    tracks = tracker.update_tracks(detections, frame=frame) if detections else []
    current_persons = set()

    for track in tracks:
        if not track.is_confirmed():
            continue

        x1, y1, x2, y2 = map(int, track.to_ltrb())
        conf = track.get_det_conf()
        if conf is None:
            continue

        deepsort_id = track.track_id

        # Extract features
        deep_feat = extract_deep_features(frame, (x1, y1, x2, y2))
        clothing_feat = extract_clothing_histogram(frame, (x1, y1, x2, y2))

        # Store features in memory buffer
        if deep_feat is not None:
            pending_features[deepsort_id]['deep'].append(deep_feat)
        if clothing_feat is not None:
            pending_features[deepsort_id]['clothing'].append(clothing_feat)

        # Wait until we have enough samples for stable features
        min_samples = 5
        if len(pending_features[deepsort_id]['deep']) < min_samples or len(pending_features[deepsort_id]['clothing']) < min_samples:
            continue

        # Calculate running average of features
        if feature_memory[deepsort_id]['count'] == 0:
            # First time we have enough samples
            avg_deep = np.mean(pending_features[deepsort_id]['deep'], axis=0)
            avg_clothing = np.mean(pending_features[deepsort_id]['clothing'], axis=0)
            feature_memory[deepsort_id] = {
                'deep': avg_deep,
                'clothing': avg_clothing,
                'count': 1
            }
        else:
            # Update with exponential moving average
            alpha = 0.2  # Smoothing factor
            current_deep = np.mean(pending_features[deepsort_id]['deep'], axis=0)
            current_clothing = np.mean(pending_features[deepsort_id]['clothing'], axis=0)
            
            feature_memory[deepsort_id]['deep'] = alpha * current_deep + (1 - alpha) * feature_memory[deepsort_id]['deep']
            feature_memory[deepsort_id]['clothing'] = alpha * current_clothing + (1 - alpha) * feature_memory[deepsort_id]['clothing']
            feature_memory[deepsort_id]['count'] += 1

        # Get current features for this track
        current_features = (feature_memory[deepsort_id]['deep'], feature_memory[deepsort_id]['clothing'])

        # Check against known persons
        best_match_id = None
        best_match_score = 0
        match_threshold = 0.75  # Combined score threshold

        for known in known_persons:
            known_features = (known["features"], known["clothing"])
            
            # Calculate combined similarity score
            deep_sim = cosine_similarity([current_features[0]], [known_features[0]])[0][0] if current_features[0] is not None and known_features[0] is not None else 0
            hist_sim = cv2.compareHist(current_features[1].astype(np.float32), known_features[1].astype(np.float32), cv2.HISTCMP_CORREL) if current_features[1] is not None and known_features[1] is not None else 0
            
            # Weighted combination of both features
            combined_score = 0.7 * deep_sim + 0.3 * hist_sim
            
            if combined_score > best_match_score and combined_score > match_threshold:
                best_match_score = combined_score
                best_match_id = known["id"]

        if best_match_id is not None:
            # Found a match - update the known person's features
            person_id = best_match_id
            for known in known_persons:
                if known["id"] == person_id:
                    # Update with weighted average
                    alpha = 0.1  # Learning rate
                    known["features"] = alpha * current_features[0] + (1 - alpha) * known["features"]
                    known["clothing"] = alpha * current_features[1] + (1 - alpha) * known["clothing"]
                    known["last_seen"] = time.time()
                    break
        else:
            # New person
            total_unique_people += 1
            person_id = total_unique_people
            known_persons.append({
                "id": person_id,
                "features": current_features[0],
                "clothing": current_features[1],
                "last_seen": time.time(),
                "first_seen": time.time()
            })

        current_persons.add(person_id)

        # Visualization
        color = (0, 255, 0)  # Green for confirmed tracks
        if time.time() - feature_memory[deepsort_id].get('first_seen', time.time()) < 2.0:
            color = (0, 165, 255)  # Orange for recently initialized tracks
            
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, f"ID: {person_id}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Display tracking stats
    cv2.putText(frame, f"Current: {len(current_persons)}", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
    cv2.putText(frame, f"Total Unique: {total_unique_people}", (10, 70),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    cv2.imshow("Improved People Tracking", frame)
    if cv2.waitKey(1) == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()




