In [1]:
import torch
import cv2
import numpy as np
from torchvision import models, transforms
from scipy.spatial.distance import cosine
import pandas as pd
from collections import defaultdict

In [2]:
# Step 2: Use the Ultralytics API to load and use the model
from ultralytics import YOLO

# Load YOLOv8 model
model = YOLO(r'C:/Users/LENOVO-PC/Desktop/Internship/best.pt')

In [3]:
resnet = models.resnet18(pretrained=True)
resnet.eval()
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])



In [None]:

# Initialize global ID counter and feature bank
global_id_counter = 0
global_id_bank = {}


In [None]:

# Initialize global ID counter and feature bank
global_id_counter = 0
global_id_bank = {}

def get_best_match(current_feat, global_id_bank, threshold=0.5):
    best_id = None
    min_dist = float("inf")
    for gid, feats in global_id_bank.items():
        for feat in feats:
            dist = cosine(current_feat, feat)
            if dist < threshold and dist < min_dist:
                min_dist = dist
                best_id = gid
    return best_id


In [None]:

def get_best_match(current_feat, global_id_bank, threshold=0.5):
    best_id = None
    min_dist = float("inf")
    for gid, feats in global_id_bank.items():
        for feat in feats:
            dist = cosine(current_feat, feat)
            if dist < threshold and dist < min_dist:
                min_dist = dist
                best_id = gid
    return best_id


In [None]:

# Inside your per-player detection loop
match_id = get_best_match(current_feat, global_id_bank)

if match_id is None:
    global_id_counter += 1
    assign_id = global_id_counter
    global_id_bank[assign_id] = [current_feat]
else:
    assign_id = match_id
    global_id_bank[assign_id].append(current_feat)

# Now use assign_id for drawing or saving


In [4]:
def extract_visual_features(image, bbox):
    x, y, w, h = bbox
    crop = image[y:y+h, x:x+w]
    if crop.size == 0:
        return None
    crop = cv2.resize(crop, (224, 224))
    crop = transform(crop).unsqueeze(0)
    with torch.no_grad():
        features = resnet(crop).numpy().flatten()
    return features

In [5]:

def detect_players(video_path):
    cap = cv2.VideoCapture(video_path)
    detections = []
    frame_idx = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        results = model(frame)
        frame_detections = []

        boxes = results[0].boxes
        if boxes is not None and boxes.xywh is not None:
            boxes_xywh = boxes.xywh.cpu().numpy()
            confidences = boxes.conf.cpu().numpy()
            classes = boxes.cls.cpu().numpy()

            for box, conf, cls in zip(boxes_xywh, confidences, classes):
                x, y, w, h = box
                print(f"Frame {frame_idx} - Class: {cls}, Conf: {conf}")
                if conf > 0.3:  # Lowered for debugging
                    features = extract_visual_features(frame, (int(x - w / 2), int(y - h / 2), int(w), int(h)))
                    if features is not None:
                        frame_detections.append({
                            'frame': frame_idx,
                            'bbox': (x, y, w, h),
                            'features': features
                        })

        print(f"Frame {frame_idx}: {len(frame_detections)} players detected")
        detections.append(frame_detections)
        frame_idx += 1

    cap.release()
    return detections


In [12]:
def match_players(broadcast_detections, tacticam_detections, feature_threshold=0.7):
    from collections import defaultdict
    from scipy.spatial.distance import cosine
    import numpy as np

    player_tracks = defaultdict(list)
    global_features = {}  # player_id: feature_vector
    player_id_counter = 0

    for frame_idx in range(min(len(broadcast_detections), len(tacticam_detections))):
        broadcast_players = broadcast_detections[frame_idx]
        tacticam_players = tacticam_detections[frame_idx]

        # Match broadcast players to global identities
        for b_player in broadcast_players:
            best_match_id = None
            best_similarity = -1

            for pid, global_feat in global_features.items():
                sim = 1 - cosine(b_player['features'], global_feat)
                if sim > feature_threshold and sim > best_similarity:
                    best_similarity = sim
                    best_match_id = pid

            if best_match_id is not None:
                player_id = best_match_id
                global_features[player_id] = b_player['features']
            else:
                player_id = player_id_counter
                global_features[player_id] = b_player['features']
                player_id_counter += 1

            player_tracks[player_id].append({
                'frame': frame_idx,
                'broadcast_bbox': b_player['bbox'],
                'tacticam_bbox': None
            })

        # Match tacticam players to global identities
        for t_player in tacticam_players:
            best_match_id = None
            best_similarity = -1

            for pid, global_feat in global_features.items():
                sim = 1 - cosine(t_player['features'], global_feat)
                if sim > feature_threshold and sim > best_similarity:
                    best_similarity = sim
                    best_match_id = pid

            if best_match_id is not None:
                player_id = best_match_id
                global_features[player_id] = t_player['features']
            else:
                player_id = player_id_counter
                global_features[player_id] = t_player['features']
                player_id_counter += 1

            found = False
            for i in range(len(player_tracks[player_id])):
                if player_tracks[player_id][i]['frame'] == frame_idx:
                    player_tracks[player_id][i]['tacticam_bbox'] = t_player['bbox']
                    found = True
                    break
            if not found:
                player_tracks[player_id].append({
                    'frame': frame_idx,
                    'broadcast_bbox': None,
                    'tacticam_bbox': t_player['bbox']
                })

    return player_tracks

In [13]:
def save_mapping(player_tracks, output_file='player_mapping.csv'):
    data = []
    for player_id, tracks in player_tracks.items():
        for track in tracks:
            data.append({
                'player_id': player_id,
                'frame': track['frame'],
                'broadcast_x': track['broadcast_bbox'][0],
                'broadcast_y': track['broadcast_bbox'][1],
                'broadcast_w': track['broadcast_bbox'][2],
                'broadcast_h': track['broadcast_bbox'][3],
                'tacticam_x': track['tacticam_bbox'][0],
                'tacticam_y': track['tacticam_bbox'][1],
                'tacticam_w': track['tacticam_bbox'][2],
                'tacticam_h': track['tacticam_bbox'][3]
            })
    df = pd.DataFrame(data)
    df.to_csv(output_file, index=False)

In [14]:

def main():
    broadcast_video = 'broadcast.mp4'
    tacticam_video = 'tacticam.mp4'
    print("Detecting players in broadcast video...")
    broadcast_detections = detect_players(broadcast_video)
    print("Sample broadcast detections:", broadcast_detections[:2])
    print("Detecting players in tacticam video...")
    tacticam_detections = detect_players(tacticam_video)
    print("Sample tacticam detections:", tacticam_detections[:2])
    print("Matching players across videos...")
    player_tracks = match_players(broadcast_detections, tacticam_detections)
    print("Number of tracked players:", len(player_tracks))
    print("Saving player mapping to CSV...")
    save_mapping(player_tracks)


In [16]:
main()

Detecting players in broadcast video...

0: 384x640 3 players, 386.7ms
Speed: 4.7ms preprocess, 386.7ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)
Frame 0 - Class: 2.0, Conf: 0.82635098695755
Frame 0 - Class: 2.0, Conf: 0.7145757079124451
Frame 0 - Class: 2.0, Conf: 0.5605186223983765
Frame 0: 3 players detected

0: 384x640 2 players, 77.3ms
Speed: 2.4ms preprocess, 77.3ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Frame 1 - Class: 2.0, Conf: 0.7765336036682129
Frame 1 - Class: 2.0, Conf: 0.341342568397522
Frame 1: 2 players detected

0: 384x640 4 players, 73.4ms
Speed: 2.5ms preprocess, 73.4ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)
Frame 2 - Class: 2.0, Conf: 0.7054756283760071
Frame 2 - Class: 2.0, Conf: 0.5709528923034668
Frame 2 - Class: 2.0, Conf: 0.46994930505752563
Frame 2 - Class: 2.0, Conf: 0.38390225172042847
Frame 2: 4 players detected

0: 384x640 2 players, 72.7ms
Speed: 2.1ms preprocess, 72.7ms inference

TypeError: 'NoneType' object is not subscriptable

In [None]:

# Process each frame (example for both feeds)
cap1 = cv2.VideoCapture('broadcast.mp4')
cap2 = cv2.VideoCapture('tacticam.mp4')

while True:
    ret1, frame1 = cap1.read()
    ret2, frame2 = cap2.read()
    if not ret1 or not ret2:
        break

    for cam_id, frame in enumerate([frame1, frame2]):
        results = model(frame)
        for r in results:
            boxes = r.boxes
            for box in boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                crop = frame[y1:y2, x1:x2]
                if crop.size == 0: continue

                # Feature extraction
                input_tensor = transform(crop).unsqueeze(0)
                with torch.no_grad():
                    feat = resnet(input_tensor).squeeze().numpy()

                # Match and assign global ID
                match_id = get_best_match(feat, global_id_bank)
                if match_id is None:
                    global_id_counter += 1
                    assign_id = global_id_counter
                    global_id_bank[assign_id] = [feat]
                else:
                    assign_id = match_id
                    global_id_bank[assign_id].append(feat)

                # Draw ID
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0,255,0), 2)
                cv2.putText(frame, f'ID: {assign_id}', (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)

    # Optional: show or save the frame
    # cv2.imshow("Frame1", frame1)
    # cv2.imshow("Frame2", frame2)
    # if cv2.waitKey(1) & 0xFF == ord('q'):
    #     break

cap1.release()
cap2.release()
cv2.destroyAllWindows()
