In [1]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from scipy.spatial.distance import cosine
from filterpy.kalman import KalmanFilter

# Load the frozen inference graph
frozen_graph_path = "frozen_inference_graph.pb"

# Load the frozen TensorFlow model
with tf.io.gfile.GFile(frozen_graph_path, "rb") as f:
    graph_def = tf.compat.v1.GraphDef()
    graph_def.ParseFromString(f.read())

# Convert the frozen graph to a function
def wrap_frozen_graph(graph_def, inputs, outputs):
    def _imports_graph_def():
        tf.compat.v1.import_graph_def(graph_def, name="")
    wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, [])
    return wrapped_import.prune(
        tf.nest.map_structure(wrapped_import.graph.as_graph_element, inputs),
        tf.nest.map_structure(wrapped_import.graph.as_graph_element, outputs))

# Define input and output tensors
inputs = ["image_tensor:0"]
outputs = ["detection_boxes:0", "detection_scores:0", "detection_classes:0", "num_detections:0"]

# Get the detection function
detection_fn = wrap_frozen_graph(graph_def, inputs, outputs)

# Initialize the video capture
cap = cv2.VideoCapture(0)

# TensorFlow function for detection outside the loop
@tf.function(input_signature=[tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.uint8)])
def detect_objects(image):
    return detection_fn(image)

# Modified ResNet50 for pose-invariant feature extraction
def create_pose_invariant_model():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(1024, activation='relu')(x)
    output = Dense(256, activation='relu')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

pose_invariant_model = create_pose_invariant_model()

# Initialize variables
person_features = {}
person_colors = {}
kalman_filters = {}
next_person_id = 1
min_detection_count = 3
person_detection_count = {}
similarity_threshold = 0.7  # Increased for stricter matching
max_features_per_person = 20  # Increased to store more features per person
angle_threshold = 0.2  # Threshold for considering a new angle

def extract_features(person_roi):
    person_roi_resized = cv2.resize(person_roi, (224, 224))
    person_roi_preprocessed = preprocess_input(person_roi_resized)
    input_tensor = np.expand_dims(person_roi_preprocessed, axis=0)
    features = pose_invariant_model.predict(input_tensor)
    return features.flatten()

def compute_similarity(feat1, feat2):
    return 1 - cosine(feat1, feat2)

def get_color(identity):
    return tuple(np.random.randint(0, 255, 3).tolist())

def initialize_kalman_filter(bbox):
    kf = KalmanFilter(dim_x=7, dim_z=4)
    kf.F = np.array([[1,0,0,0,1,0,0],
                     [0,1,0,0,0,1,0],
                     [0,0,1,0,0,0,1],
                     [0,0,0,1,0,0,0],
                     [0,0,0,0,1,0,0],
                     [0,0,0,0,0,1,0],
                     [0,0,0,0,0,0,1]])
    kf.H = np.array([[1,0,0,0,0,0,0],
                     [0,1,0,0,0,0,0],
                     [0,0,1,0,0,0,0],
                     [0,0,0,1,0,0,0]])
    kf.R[2:,2:] *= 10.
    kf.P[4:,4:] *= 1000.
    kf.P *= 10.
    kf.Q[-1,-1] *= 0.01
    kf.Q[4:,4:] *= 0.01
    kf.x[:4] = bbox.reshape(4,1)
    return kf

def update_kalman_filter(kf, bbox):
    kf.update(bbox)
    kf.predict()
    return kf

def load_existing_features():
    global next_person_id
    if not os.path.exists("person_library_"):
        os.makedirs("person_library_")
    for person_dir in os.listdir("person_library_"):
        if person_dir.startswith("person_"):
            person_id = int(person_dir.split("_")[1])
            next_person_id = max(next_person_id, person_id + 1)
            person_features[person_id] = []
            for image_file in os.listdir(os.path.join("person_library_", person_dir)):
                if image_file.endswith(".jpg"):
                    img = cv2.imread(os.path.join("person_library_", person_dir, image_file))
                    features = extract_features(img)
                    person_features[person_id].append(features)

def save_person_image(person_id, image, features):
    directory = f"person_library_/person_{person_id}"
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    file_count = len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])
    filename = f"{directory}/image_{file_count+1}.jpg"
    cv2.imwrite(filename, image)
    
    if person_id not in person_features:
        person_features[person_id] = []
    
    person_features[person_id].append(features)
    if len(person_features[person_id]) > max_features_per_person:
        person_features[person_id].pop(0)

def is_new_angle(identity, new_features):
    if identity not in person_features:
        return True
    for existing_features in person_features[identity]:
        if compute_similarity(existing_features, new_features) > 1 - angle_threshold:
            return False
    return True

def compare_with_database(features):
    best_match = None
    max_similarity = 0
    for identity, feat_list in person_features.items():
        similarities = [compute_similarity(feat, features) for feat in feat_list]
        avg_similarity = np.mean(similarities)
        if avg_similarity > max_similarity:
            max_similarity = avg_similarity
            best_match = identity
    return best_match, max_similarity

def match_and_identify(features, bbox, person_roi):
    global next_person_id

    best_match, max_similarity = compare_with_database(features)

    if max_similarity > similarity_threshold and best_match is not None:
        identity = best_match
        person_detection_count[identity] = person_detection_count.get(identity, 0) + 1
        color = person_colors.get(identity, get_color(identity))
        person_colors[identity] = color
        kalman_filters[identity] = update_kalman_filter(kalman_filters.get(identity, initialize_kalman_filter(bbox)), bbox)
        
        if person_detection_count[identity] >= min_detection_count and is_new_angle(identity, features):
            save_person_image(identity, person_roi, features)
    else:
        identity = next_person_id
        color = get_color(identity)
        person_colors[identity] = color
        kalman_filters[identity] = initialize_kalman_filter(bbox)
        person_detection_count[identity] = 1
        save_person_image(identity, person_roi, features)
        next_person_id += 1

    return identity, color, max_similarity

# Main loop
try:
    load_existing_features()
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        image_np = np.array(frame)
        input_tensor = np.expand_dims(image_np, axis=0)

        detections = detect_objects(input_tensor)

        boxes = detections[0].numpy()[0]
        scores = detections[1].numpy()[0]
        classes = detections[2].numpy()[0]
        num_detections = int(detections[3].numpy()[0])

        threshold = 0.5
        h, w, _ = frame.shape

        for i in range(num_detections):
            class_id = int(classes[i])
            score = scores[i]
            box = boxes[i]

            if class_id == 1 and score > threshold:
                ymin, xmin, ymax, xmax = box
                left, right, top, bottom = int(xmin * w), int(xmax * w), int(ymin * h), int(ymax * h)
                
                person_roi = frame[top:bottom, left:right]
                features = extract_features(person_roi)
                predicted_bbox = np.array([xmin, ymin, xmax, ymax])

                identity, color, similarity = match_and_identify(features, predicted_bbox, person_roi)

                label = f'Person {identity}'
                if person_detection_count[identity] >= min_detection_count:
                    label += f' (Confirmed)'
                label += f' ({similarity:.2f})'

                left, top, right, bottom = int(predicted_bbox[0] * w), int(predicted_bbox[1] * h), int(predicted_bbox[2] * w), int(predicted_bbox[3] * h)
                cv2.rectangle(frame, (left, top), (right, bottom), color, 2)
                cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        cv2.imshow("Person Re-Identification", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
finally:
    cap.release()
    cv2.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 104ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from scipy.spatial.distance import cosine
from filterpy.kalman import KalmanFilter
from sklearn.metrics import confusion_matrix

# Load the frozen inference graph
frozen_graph_path = "frozen_inference_graph.pb"

# Load the frozen TensorFlow model
with tf.io.gfile.GFile(frozen_graph_path, "rb") as f:
    graph_def = tf.compat.v1.GraphDef()
    graph_def.ParseFromString(f.read())

# Convert the frozen graph to a function
def wrap_frozen_graph(graph_def, inputs, outputs):
    def _imports_graph_def():
        tf.compat.v1.import_graph_def(graph_def, name="")
    wrapped_import = tf.compat.v1.wrap_function(_imports_graph_def, [])
    return wrapped_import.prune(
        tf.nest.map_structure(wrapped_import.graph.as_graph_element, inputs),
        tf.nest.map_structure(wrapped_import.graph.as_graph_element, outputs))

# Define input and output tensors
inputs = ["image_tensor:0"]
outputs = ["detection_boxes:0", "detection_scores:0", "detection_classes:0", "num_detections:0"]

# Get the detection function
detection_fn = wrap_frozen_graph(graph_def, inputs, outputs)

# TensorFlow function for detection outside the loop
@tf.function(input_signature=[tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.uint8)])
def detect_objects(image):
    return detection_fn(image)

# Modified ResNet50 for pose-invariant feature extraction
def create_pose_invariant_model():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dense(1024, activation='relu')(x)
    output = Dense(256, activation='relu')(x)
    model = Model(inputs=base_model.input, outputs=output)
    return model

pose_invariant_model = create_pose_invariant_model()

# Initialize variables
person_features = {}
person_colors = {}
kalman_filters = {}
next_person_id = 1
min_detection_count = 3
person_detection_count = {}
similarity_threshold = 0.7
max_features_per_person = 20
angle_threshold = 0.2

def extract_features(person_roi):
    person_roi_resized = cv2.resize(person_roi, (224, 224))
    person_roi_preprocessed = preprocess_input(person_roi_resized)
    input_tensor = np.expand_dims(person_roi_preprocessed, axis=0)
    features = pose_invariant_model.predict(input_tensor)
    return features.flatten()

def compute_similarity(feat1, feat2):
    return 1 - cosine(feat1, feat2)

def get_color(identity):
    return tuple(np.random.randint(0, 255, 3).tolist())

def initialize_kalman_filter(bbox):
    kf = KalmanFilter(dim_x=7, dim_z=4)
    kf.F = np.array([[1,0,0,0,1,0,0],
                     [0,1,0,0,0,1,0],
                     [0,0,1,0,0,0,1],
                     [0,0,0,1,0,0,0],
                     [0,0,0,0,1,0,0],
                     [0,0,0,0,0,1,0],
                     [0,0,0,0,0,0,1]])
    kf.H = np.array([[1,0,0,0,0,0,0],
                     [0,1,0,0,0,0,0],
                     [0,0,1,0,0,0,0],
                     [0,0,0,1,0,0,0]])
    kf.R[2:,2:] *= 10.
    kf.P[4:,4:] *= 1000.
    kf.P *= 10.
    kf.Q[-1,-1] *= 0.01
    kf.Q[4:,4:] *= 0.01
    kf.x[:4] = bbox.reshape(4,1)
    return kf

def update_kalman_filter(kf, bbox):
    kf.update(bbox)
    kf.predict()
    return kf

def load_existing_features():
    global next_person_id
    if not os.path.exists("person_library"):
        os.makedirs("person_library")
    for person_dir in os.listdir("person_library"):
        if person_dir.startswith("person_"):
            person_id = int(person_dir.split("_")[1])
            next_person_id = max(next_person_id, person_id + 1)
            person_features[person_id] = []
            for image_file in os.listdir(os.path.join("person_library", person_dir)):
                if image_file.endswith(".jpg"):
                    img = cv2.imread(os.path.join("person_library", person_dir, image_file))
                    features = extract_features(img)
                    person_features[person_id].append(features)

def save_person_image(person_id, image, features):
    directory = f"person_library/person_{person_id}"
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    file_count = len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])
    filename = f"{directory}/image_{file_count+1}.jpg"
    cv2.imwrite(filename, image)
    
    if person_id not in person_features:
        person_features[person_id] = []
    
    person_features[person_id].append(features)
    if len(person_features[person_id]) > max_features_per_person:
        person_features[person_id].pop(0)

def is_new_angle(identity, new_features):
    if identity not in person_features:
        return True
    for existing_features in person_features[identity]:
        if compute_similarity(existing_features, new_features) > 1 - angle_threshold:
            return False
    return True

def compare_with_database(features):
    best_match = None
    max_similarity = 0
    for identity, feat_list in person_features.items():
        similarities = [compute_similarity(feat, features) for feat in feat_list]
        avg_similarity = np.mean(similarities)
        if avg_similarity > max_similarity:
            max_similarity = avg_similarity
            best_match = identity
    return best_match, max_similarity

def match_and_identify(features, bbox, person_roi):
    global next_person_id

    best_match, max_similarity = compare_with_database(features)

    if max_similarity > similarity_threshold and best_match is not None:
        identity = best_match
        person_detection_count[identity] = person_detection_count.get(identity, 0) + 1
        color = person_colors.get(identity, get_color(identity))
        person_colors[identity] = color
        kalman_filters[identity] = update_kalman_filter(kalman_filters.get(identity, initialize_kalman_filter(bbox)), bbox)
        
        if person_detection_count[identity] >= min_detection_count and is_new_angle(identity, features):
            save_person_image(identity, person_roi, features)
    else:
        identity = next_person_id
        color = get_color(identity)
        person_colors[identity] = color
        kalman_filters[identity] = initialize_kalman_filter(bbox)
        person_detection_count[identity] = 1
        save_person_image(identity, person_roi, features)
        next_person_id += 1

    return identity, color, max_similarity

# Accuracy evaluation functions
def calculate_metrics(true_ids, predicted_ids):
    if not true_ids or not predicted_ids:
        print("Warning: Empty true_ids or predicted_ids list")
        return 0, 0, 0, 0
    
    cm = confusion_matrix(true_ids, predicted_ids)
    accuracy = np.sum(np.diag(cm)) / np.sum(cm) if np.sum(cm) > 0 else 0
    precision = np.diag(cm) / np.sum(cm, axis=0) if np.sum(cm, axis=0).any() else np.zeros_like(np.diag(cm))
    recall = np.diag(cm) / np.sum(cm, axis=1) if np.sum(cm, axis=1).any() else np.zeros_like(np.diag(cm))
    f1_score = 2 * (precision * recall) / (precision + recall)
    mean_precision = np.nanmean(precision) if len(precision) > 0 else 0
    mean_recall = np.nanmean(recall) if len(recall) > 0 else 0
    mean_f1_score = np.nanmean(f1_score) if len(f1_score) > 0 else 0
    return accuracy, mean_precision, mean_recall, mean_f1_score

def calculate_id_switch_rate(true_ids, predicted_ids):
    if not true_ids or not predicted_ids:
        print("Warning: Empty true_ids or predicted_ids list")
        return 0
    
    switches = 0
    for i in range(1, len(true_ids)):
        if true_ids[i] == true_ids[i-1] and predicted_ids[i] != predicted_ids[i-1]:
            switches += 1
    return switches / len(true_ids) if len(true_ids) > 0 else 0

def calculate_mota(true_ids, predicted_ids):
    if not true_ids or not predicted_ids:
        print("Warning: Empty true_ids or predicted_ids list")
        return 0
    
    misses = sum(1 for t, p in zip(true_ids, predicted_ids) if p == -1)
    false_positives = sum(1 for t, p in zip(true_ids, predicted_ids) if t == -1)
    mismatches = sum(1 for t, p in zip(true_ids, predicted_ids) if t != p and t != -1 and p != -1)
    total = len([t for t in true_ids if t != -1])
    mota = 1 - (misses + false_positives + mismatches) / total if total > 0 else 0
    return mota

def get_true_id(frame_count, left, top, right, bottom):
    # This is a placeholder. In a real scenario, you would implement this based on your ground truth data.
    # For testing purposes, we'll return a random ID.
    return np.random.randint(1, 10)

# Main loop
true_ids = []
predicted_ids = []

# Use camera feed
cap = cv2.VideoCapture(0)  # 0 for default camera, you might need to change this

try:
    load_existing_features()
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            print("Failed to read frame from camera")
            break

        print(f"Frame {frame_count}: Shape = {frame.shape}")

        image_np = np.array(frame)
        input_tensor = np.expand_dims(image_np, axis=0)

        detections = detect_objects(input_tensor)

        boxes = detections[0].numpy()[0]
        scores = detections[1].numpy()[0]
        classes = detections[2].numpy()[0]
        num_detections = int(detections[3].numpy()[0])

        print(f"Frame {frame_count}: {num_detections} detections")

        threshold = 0.5
        h, w, _ = frame.shape

        for i in range(num_detections):
            class_id = int(classes[i])
            score = scores[i]
            box = boxes[i]

            print(f"Detection {i}: class_id={class_id}, score={score}, box={box}")

            if class_id == 1 and score > threshold:
                ymin, xmin, ymax, xmax = box
                left, right, top, bottom = int(xmin * w), int(xmax * w), int(ymin * h), int(ymax * h)
                
                person_roi = frame[top:bottom, left:right]
                features = extract_features(person_roi)
                predicted_bbox = np.array([xmin, ymin, xmax, ymax])

                identity, color, similarity = match_and_identify(features, predicted_bbox, person_roi)

                true_id = get_true_id(frame_count, left, top, right, bottom)
                true_ids.append(true_id)
                predicted_ids.append(identity)

                label = f'Person {identity}'
                if person_detection_count[identity] >= min_detection_count:
                    label += f' (Confirmed)'
                label += f' ({similarity:.2f})'

                cv2.rectangle(frame, (left, top), (right, bottom), color, 2)
                cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

                print(f"Frame {frame_count}: Detected person {identity}, True ID: {true_id}, Similarity: {similarity:.2f}")

        cv2.imshow("Person Re-Identification", frame)
        frame_count += 1

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
finally:
    cap.release()
    cv2.destroyAllWindows()

# Calculate and print metrics
if true_ids and predicted_ids:
    accuracy, mean_precision, mean_recall, mean_f1_score = calculate_metrics(true_ids, predicted_ids)
    id_switch_rate = calculate_id_switch_rate(true_ids, predicted_ids)
    mota = calculate_mota(true_ids, predicted_ids)

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Mean Precision: {mean_precision:.4f}")
    print(f"Mean Recall: {mean_recall:.4f}")
    print(f"Mean F1-Score: {mean_f1_score:.4f}")
    print(f"ID Switch Rate: {id_switch_rate:.4f}")
    print(f"MOTA: {mota:.4f}")
else:
    print("No detections were made. Check camera feed and detection threshold.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 102ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step
Frame 0: Shape = (480, 640, 3)
Frame 0: 100 detections
Detecti