## Setting up the dependencies

In [None]:
pip install mmcv==2.1.0 -f https://download.openmmlab.com/mmcv/dist/cu121/torch2.4/index.html

In [None]:
!pip install "mmdet>=3.0.0"  # Latest 3.x version
!pip install "mmpose>=1.2.0"  # Latest 1.x version

In [None]:
!pip index versions mmcv

In [None]:
try:
    import mmcv
    print("MMCV version:", mmcv.__version__)

    import mmengine
    print("MMEngine version:", mmengine.__version__)

    import mmdet
    print("MMDet version:", mmdet.__version__)

    import mmpose
    print("MMPose version:", mmpose.__version__)

    print("\nAll required packages installed successfully!")
except ImportError as e:
    print("Error:", e)

### Cloning the appropriate repositories

In [None]:
!git clone https://github.com/Mostafa-Nafie/Football-Object-Detection

In [None]:
!git clone https://github.com/mkoshkina/jersey-number-pipeline/

In [None]:
!git clone https://github.com/ViTAE-Transformer/ViTPose

## Now trying ViTPose dependency installations

In [None]:
# Set CUDA environment variables
# import os
# os.environ['CUDA_HOME'] = '/usr/local/cuda-12.3'
# os.environ['LD_LIBRARY_PATH'] = '/usr/local/cuda-12.3/lib64'

# Import the necessary modules
from mmpose.apis import init_model, inference_topdown
from mmpose.utils import register_all_modules

# Verify PyTorch CUDA setup
import torch
print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)

# Print MM* versions
import mmcv
import mmengine
import mmdet
import mmpose

print("\nInstalled versions:")
print(f"MMCV version: {mmcv.__version__}")
print(f"MMEngine version: {mmengine.__version__}")
print(f"MMDet version: {mmdet.__version__}")
print(f"MMPose version: {mmpose.__version__}")

# Try to register modules
register_all_modules()

### Setting up

#### Extracting frames and using legitablity classifier as a filter

In [None]:
!pip install gdown

In [None]:
!pip install ultralytics

In [None]:
import cv2
import os

def create_folder_structure():
    """Create folder for storing detected frames"""
    base_folder = 'Jersey_Detection_Project'
    frames_folder = 'detected_frames'

    # Create folders
    os.makedirs(base_folder, exist_ok=True)
    frames_path = os.path.join(base_folder, frames_folder)
    os.makedirs(frames_path, exist_ok=True)

    return frames_path

def extract_frames(video_path, output_folder, frame_interval=1):
    """
    Extract frames from object-detected video
    Args:
        video_path: path to object-detected video
        output_folder: where to save the frames
        frame_interval: extract every nth frame
    """
    # Open video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video: {video_path}")
        return None

    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    print(f"Video Properties:")
    print(f"Total Frames: {total_frames}")
    print(f"FPS: {fps}")

    frame_count = 0
    saved_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Save frame at specified intervals
        if frame_count % frame_interval == 0:
            frame_filename = f'frame_{saved_count:06d}.jpg'
            frame_path = os.path.join(output_folder, frame_filename)

            # Save frame
            cv2.imwrite(frame_path, frame)
            saved_count += 1

            # Print progress every 100 frames
            if saved_count % 100 == 0:
                print(f"Saved {saved_count} frames ({(frame_count/total_frames)*100:.2f}% complete)")

        frame_count += 1

    cap.release()
    print(f"\nExtraction Complete!")
    print(f"Saved {saved_count} frames to {output_folder}")

    return saved_count

# Usage
if __name__ == "__main__":
    # Replace with your object-detected video path
    detected_video = "/kaggle/input/video-object-detected/vid_object_detected.mp4"

    # Create folders
    output_folder = create_folder_structure()

    # Extract every 5th frame (adjust as needed)
    frame_interval = 5

    # Extract frames
    frame_count = extract_frames(detected_video, output_folder, frame_interval)

    print(f"\nTotal frames extracted: {frame_count}")

In [None]:
from ultralytics import YOLO
import cv2
import os

def save_player_crops(video_path, output_dir="player_crops"):
    """Process video and save player crops with unique identifiers"""
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Load model
    model = YOLO("/kaggle/working/Football-Object-Detection/weights/last.pt")
    
    # Open video
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    total_crops = 0
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        frame_count += 1
        
        # Run detection
        results = model(frame, conf=0.5, verbose=False)[0]
        
        # Process each detection
        boxes = results.boxes
        for i, box in enumerate(boxes):
            # Get box coordinates
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            label = int(box.cls.cpu().numpy()[0])
            
            # Only process players and goalkeepers (labels 0 and 1)
            if label in [0, 1]:
                # Add padding
                padding = 5
                x1_pad = max(0, x1 - padding)
                y1_pad = max(0, y1 - padding)
                x2_pad = min(frame.shape[1], x2 + padding)
                y2_pad = min(frame.shape[0], y2 + padding)
                
                # Crop player
                player_crop = frame[y1_pad:y2_pad, x1_pad:x2_pad]
                
                # Save crop with unique identifier
                filename = f"frame_{frame_count}_player_{i}_{x1}_{y1}.jpg"
                cv2.imwrite(os.path.join(output_dir, filename), player_crop)
                total_crops += 1
        
        # Progress update
        if frame_count % 100 == 0:
            print(f"Processed {frame_count} frames, saved {total_crops} crops")
    
    cap.release()
    print(f"Complete: Processed {frame_count} frames, saved {total_crops} crops")
    print(f"Crops saved to {output_dir}")

# Usage
video_path = "/kaggle/input/video-object-detected/vid_object_detected.mp4"
save_player_crops(video_path)

In [None]:
from ultralytics import YOLO
import cv2
import os

def process_detected_video(video_path, output_dir="player_crops"):
    """
    Process video with existing detections and save player crops.
    Handles both CPU and GPU tensors.
    """
    # Create output directory if doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Load the model
    model = YOLO("/kaggle/working/Football-Object-Detection/weights/last.pt")
    
    # Open the video
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
            
        frame_count += 1
        
        # Run detection
        results = model(frame, conf=0.5, verbose=False)[0]
        
        # Process each detection
        boxes = results.boxes
        for box in boxes:
            # Get box coordinates - move to CPU first
            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            
            # Get class label - move to CPU first
            label = int(box.cls.cpu().numpy()[0])
            
            # Only process players and goalkeepers (labels 0 and 1)
            if label in [0, 1]:
                # Add padding
                padding = 5
                x1 = max(0, x1 - padding)
                y1 = max(0, y1 - padding)
                x2 = min(frame.shape[1], x2 + padding)
                y2 = min(frame.shape[0], y2 + padding)
                
                # Crop player
                player_crop = frame[y1:y2, x1:x2]
                
                # Save crop with frame number and position
                crop_filename = f"frame_{frame_count}_player_{x1}_{y1}.jpg"
                cv2.imwrite(os.path.join(output_dir, crop_filename), player_crop)
        
        # Optional: Show progress
        if frame_count % 100 == 0:
            print(f"Processed {frame_count} frames")
    
    cap.release()
    print(f"Processing complete. Crops saved to {output_dir}")

# Use it
video_path = "/kaggle/input/video-object-detected/vid_object_detected.mp4"
process_detected_video(video_path, "player_crops")

In [None]:
# Save the content as setup.py
import os
import gdown

# Create models directory
os.makedirs('models', exist_ok=True)

# Download soccer legibility model
soccer_model_url = "https://drive.google.com/uc?id=18HAuZbge3z8TSfRiX_FzsnKgiBs-RRNw"
soccer_model_path = "models/legibility_resnet34_soccer_20240215.pth"
if not os.path.exists(soccer_model_path):
    gdown.download(soccer_model_url, soccer_model_path, fuzzy=True)

print("Downloaded pre-trained legibility model!")

In [None]:
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np

def load_legibility_model(model_path):
    """Load pretrained ResNet34 model for legibility classification"""
    model = models.resnet34(pretrained=False)
    num_features = model.fc.in_features
    model.fc = torch.nn.Linear(num_features, 1)
    
    # Load and modify state dict
    state_dict = torch.load(model_path, map_location='cpu')
    new_state_dict = {k.replace("model_ft.", ""): v for k, v in state_dict.items()}
    model.load_state_dict(new_state_dict)
    model.eval()
    return model

def process_single_image(model, image_path, transform):
    """Process a single image and return prediction with confidence"""
    image = Image.open(image_path).convert('RGB')
    input_tensor = transform(image).unsqueeze(0)
    
    with torch.no_grad():
        output = model(input_tensor)
        confidence = torch.sigmoid(output).item()
        prediction = confidence > 0.5
    
    return prediction, confidence, image

def analyze_and_visualize_crops(model, crops_dir, num_samples=10):
    """Analyze and visualize the legibility classification process"""
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    results = {'legible': [], 'illegible': []}
    confidences = []
    
    # Process all images
    for crop_file in os.listdir(crops_dir):
        if crop_file.endswith('.jpg'):
            crop_path = os.path.join(crops_dir, crop_file)
            prediction, confidence, image = process_single_image(model, crop_path, transform)
            
            # Store results
            result = {
                'path': crop_path,
                'confidence': confidence,
                'image': image
            }
            
            if prediction:
                results['legible'].append(result)
                # Copy to legible_crops directory
                os.makedirs('legible_crops', exist_ok=True)
                new_path = os.path.join('legible_crops', crop_file)
                os.system(f'cp "{crop_path}" "{new_path}"')
            else:
                results['illegible'].append(result)
            
            confidences.append(confidence)
    
    # Print statistics
    total_images = len(confidences)
    legible_count = len(results['legible'])
    
    print("\nLegibility Classification Results:")
    print(f"Total images processed: {total_images}")
    print(f"Legible images: {legible_count} ({legible_count/total_images*100:.1f}%)")
    print(f"Illegible images: {total_images - legible_count} ({(total_images-legible_count)/total_images*100:.1f}%)")
    
    print("\nConfidence Statistics:")
    print(f"Average confidence: {np.mean(confidences):.3f}")
    print(f"Median confidence: {np.median(confidences):.3f}")
    print(f"Standard deviation: {np.std(confidences):.3f}")
    
    # Visualize sample results
    plt.figure(figsize=(15, 10))
    
    # Plot legible samples
    for i, result in enumerate(results['legible'][:num_samples//2]):
        plt.subplot(2, num_samples//2, i+1)
        plt.imshow(result['image'])
        plt.title(f'Legible\nConf: {result["confidence"]:.2f}')
        plt.axis('off')
    
    # Plot illegible samples
    for i, result in enumerate(results['illegible'][:num_samples//2]):
        plt.subplot(2, num_samples//2, i+1+num_samples//2)
        plt.imshow(result['image'])
        plt.title(f'Illegible\nConf: {result["confidence"]:.2f}')
        plt.axis('off')
    
    plt.tight_layout()
    plt.savefig('legibility_analysis.png')
    plt.close()
    
    # Plot confidence distribution
    plt.figure(figsize=(10, 6))
    plt.hist(confidences, bins=50, edgecolor='black')
    plt.title('Distribution of Confidence Scores')
    plt.xlabel('Confidence Score')
    plt.ylabel('Number of Images')
    plt.axvline(x=0.5, color='r', linestyle='--', label='Decision Threshold')
    plt.legend()
    plt.savefig('confidence_distribution.png')
    plt.close()
    
    return results

# Usage
model_path = "models/legibility_resnet34_soccer_20240215.pth"
model = load_legibility_model(model_path)
results = analyze_and_visualize_crops(model, "player_crops")

### After setting up the legitibility classifier, using it for classification

In [None]:
import torch
import torchvision.models as models
from torchvision import transforms
from PIL import Image
import os

def load_legibility_model(model_path):
    # Create ResNet34 model
    model = models.resnet34(pretrained=False)
    num_features = model.fc.in_features
    # Change to match their architecture - output size of 1 instead of 2
    model.fc = torch.nn.Linear(num_features, 1)  # Single output for binary classification

    # Load state dict and remove 'model_ft' prefix
    state_dict = torch.load(model_path, map_location='cpu')
    new_state_dict = {}
    for k, v in state_dict.items():
        # Remove 'model_ft.' prefix from the key names
        name = k.replace("model_ft.", "")
        new_state_dict[name] = v

    # Load the modified state dict
    model.load_state_dict(new_state_dict)
    model.eval()
    return model

def classify_crops(model, crops_dir):
    # Image transforms
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])

    legible_crops = []

    # Process each crop
    for crop_file in os.listdir(crops_dir):
        if crop_file.endswith('.jpg'):
            crop_path = os.path.join(crops_dir, crop_file)

            # Load and transform image
            image = Image.open(crop_path).convert('RGB')
            input_tensor = transform(image).unsqueeze(0)

            # Get prediction
            with torch.no_grad():
                output = model(input_tensor)
                # Use sigmoid for binary classification
                prediction = torch.sigmoid(output) > 0.5

            if prediction.item():  # If legible
                legible_crops.append(crop_path)

                # Move legible images to a separate directory
                os.makedirs('legible_crops', exist_ok=True)
                new_path = os.path.join('legible_crops', crop_file)
                os.system(f'cp "{crop_path}" "{new_path}"')

    print(f"Found {len(legible_crops)} legible crops out of {len(os.listdir(crops_dir))} total crops")
    return legible_crops

# Use the models
model_path = "models/legibility_resnet34_soccer_20240215.pth"
model = load_legibility_model(model_path)
legible_crops = classify_crops(model, "player_crops")

#### ViTPose Algorithm for Torso detection

In [None]:
import os
import gdown

# Create checkpoints directory
os.makedirs('./ViTPose/checkpoints', exist_ok=True)

# Download ViTPose-H model
model_url = 'https://drive.google.com/uc?id=1A3ftF118IcxMn_QONndR-8dPWpf7XzdV'
model_path = './ViTPose/checkpoints/vitpose-h.pth'
if not os.path.exists(model_path):
    print("Downloading ViTPose model...")
    gdown.download(model_url, model_path, fuzzy=True)

In [None]:
# Print versions
import mmcv
import mmengine
import mmdet
import mmpose
import torch

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("MMCV version:", mmcv.__version__)
print("MMEngine version:", mmengine.__version__)
print("MMDet version:", mmdet.__version__)
print("MMPose version:", mmpose.__version__)

# Check if model and config exist
print("\nChecking files:")
print("Model exists:", os.path.exists('./ViTPose/checkpoints/vitpose-h.pth'))
print("\nAvailable configs:")
config_dir = './ViTPose/configs'
if os.path.exists(config_dir):
    for root, dirs, files in os.walk(config_dir):
        for file in files:
            if 'vitpose_huge' in file.lower() and file.endswith('.py'):
                print(os.path.join(root, file))

In [None]:
!pip install -U openmim

In [None]:
!mim download mmpose --config rtmpose-l_8xb256-420e_coco-256x192 --dest .

In [None]:
import os
# Check current directory for downloaded files
print("Files in current directory:")
!ls *.py  # Look for config files
!ls *.pth  # Look for model weights

In [None]:
# Print MMPose version and path
import mmpose
print("MMPose version:", mmpose.__version__)
print("MMPose installation path:", mmpose.__path__)

# Check for RTMPose configs
def find_rtmpose_configs(base_path):
    """Find RTMPose config files"""
    print("\nSearching for RTMPose configs...")
    for root, dirs, files in os.walk(base_path[0]):
        for file in files:
            if 'rtmpose' in file.lower() and file.endswith('.py'):
                print(os.path.join(root, file))

find_rtmpose_configs(mmpose.__path__)

In [None]:
import cv2
import numpy as np
from mmpose.apis import inference_topdown, init_model
from mmpose.utils import register_all_modules
import matplotlib.pyplot as plt

def visualize_vitpose_steps(image_path, model):
    """Visualize each step of ViTPose processing"""
    
    # 1. Load and preprocess image
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    print("Input Image Shape:", image.shape)
    
    # 2. Get pose estimation results
    bboxes = [[0, 0, image.shape[1], image.shape[0]]]
    results = inference_topdown(model, image, bboxes)
    
    # Extract keypoints from the new format
    # The keypoints are now in pred_instances
    keypoints = results[0].pred_instances.keypoints[0]  # Shape: (17, 2)
    scores = results[0].pred_instances.keypoint_scores[0]  # Shape: (17,)
    
    # Combine keypoints and scores
    keypoints_with_scores = np.concatenate([keypoints, scores[:, None]], axis=1)
    print("\nKeypoints Shape:", keypoints_with_scores.shape)
    print("\nSample Keypoint (Left Shoulder):", keypoints_with_scores[5])
    
    # 3. Visualize keypoints on image
    def draw_keypoints(img, keypoints_with_scores):
        img_copy = img.copy()
        for i, (x, y, conf) in enumerate(keypoints_with_scores):
            if conf > 0.3:  # Only draw high-confidence points
                cv2.circle(img_copy, (int(x), int(y)), 3, (0, 255, 0), -1)
                cv2.putText(img_copy, str(i), (int(x), int(y)), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
        return img_copy
    
    keypoint_viz = draw_keypoints(image_rgb, keypoints_with_scores)
    
    # 4. Extract and visualize torso
    def get_torso_crop(img, keypoints_with_scores):
        # Get torso keypoints
        l_shoulder = keypoints_with_scores[5][:2]
        r_shoulder = keypoints_with_scores[6][:2]
        l_hip = keypoints_with_scores[11][:2]
        r_hip = keypoints_with_scores[12][:2]
        
        # Calculate bounds
        top = max(0, int(min(l_shoulder[1], r_shoulder[1])))
        bottom = min(img.shape[0], int(max(l_hip[1], r_hip[1])))
        left = max(0, int(min(l_shoulder[0], l_hip[0])))
        right = min(img.shape[1], int(max(r_shoulder[0], r_hip[0])))
        
        # Add padding
        padding = 5
        top = max(0, top - padding)
        bottom = min(img.shape[0], bottom + padding)
        left = max(0, left - padding)
        right = min(img.shape[1], right + padding)
        
        # Draw torso box
        img_with_box = img.copy()
        cv2.rectangle(img_with_box, (left, top), (right, bottom), (255, 0, 0), 2)
        
        # Crop torso
        torso = img[top:bottom, left:right]
        
        return img_with_box, torso
    
    boxed_image, torso_crop = get_torso_crop(image_rgb, keypoints_with_scores)
    
    # Print keypoint coordinates and confidence
    print("\nKeypoint Coordinates and Confidence:")
    for i, (x, y, conf) in enumerate(keypoints_with_scores):
        if conf > 0.3:  # Only show high-confidence points
            print(f"Keypoint {i}: x={x:.1f}, y={y:.1f}, confidence={conf:.2f}")
    
    # Visualize results
    plt.figure(figsize=(15, 10))
    
    plt.subplot(2, 2, 1)
    plt.title("Original Image")
    plt.imshow(image_rgb)
    
    plt.subplot(2, 2, 2)
    plt.title("Detected Keypoints")
    plt.imshow(keypoint_viz)
    
    plt.subplot(2, 2, 3)
    plt.title("Torso Box")
    plt.imshow(boxed_image)
    
    plt.subplot(2, 2, 4)
    plt.title("Torso Crop")
    plt.imshow(torso_crop)
    
    plt.tight_layout()
    plt.show()
    
    return {
        'original': image_rgb,
        'keypoints': keypoints_with_scores,
        'keypoint_viz': keypoint_viz,
        'boxed': boxed_image,
        'torso': torso_crop
    }

# Usage
register_all_modules()
config = 'rtmpose-l_8xb256-420e_coco-256x192.py'
checkpoint = 'rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth'
model = init_model(config, checkpoint, device='cpu')

# Process a single image
image_path = '/kaggle/working/legible_crops/frame_10_player_0_785_475.jpg'
results = visualize_vitpose_steps(image_path, model)

In [None]:
from mmpose.apis import inference_topdown
from mmpose.apis import init_model
from mmpose.utils import register_all_modules
import cv2
import numpy as np

# Register all modules (This is important!)
register_all_modules()

def test_mmpose():
    # Model loading
    config = 'rtmpose-l_8xb256-420e_coco-256x192.py'  # Use the config we downloaded
    checkpoint = 'rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth'  # Use the checkpoint we downloaded
    model = init_model(config, checkpoint, device='cpu')
    
    # Load an image
    img = cv2.imread('legible_crops/frame_703_player_460_251.jpg')
    
    # Single instance detection
    bboxes = [[0, 0, img.shape[1], img.shape[0]]]  # Full image bbox
    
    print("Model:", type(model))
    print("Image shape:", img.shape)
    print("Bboxes:", bboxes)
    
    # Try inference
    try:
        results = inference_topdown(model, img, bboxes)
        print("Results:", results)
    except Exception as e:
        print("Error:", e)
        print("Model config:", model.cfg)

test_mmpose()

In [None]:
import cv2
import numpy as np
from mmpose.apis import inference_topdown, init_model
from mmpose.utils import register_all_modules
import matplotlib.pyplot as plt

def visualize_vitpose_steps(image_path, model):
    """Visualize each step of ViTPose processing"""
    
    # 1. Load and preprocess image
    image = cv2.imread(image_path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    print("Input Image Shape:", image.shape)
    
    # 2. Get pose estimation results
    bboxes = [[0, 0, image.shape[1], image.shape[0]]]
    results = inference_topdown(model, image, bboxes)
    keypoints = results[0].keypoints[0]  # Shape: (17, 3)
    print("\nKeypoints Shape:", keypoints.shape)
    print("\nSample Keypoint (Left Shoulder):", keypoints[5])
    
    # 3. Visualize keypoints on image
    def draw_keypoints(img, keypoints):
        img_copy = img.copy()
        for i, (x, y, conf) in enumerate(keypoints):
            if conf > 0.3:  # Only draw high-confidence points
                cv2.circle(img_copy, (int(x), int(y)), 3, (0, 255, 0), -1)
                cv2.putText(img_copy, str(i), (int(x), int(y)), 
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
        return img_copy
    
    keypoint_viz = draw_keypoints(image_rgb, keypoints)
    
    # 4. Extract and visualize torso
    def get_torso_crop(img, keypoints):
        # Get torso keypoints
        l_shoulder = keypoints[5][:2]
        r_shoulder = keypoints[6][:2]
        l_hip = keypoints[11][:2]
        r_hip = keypoints[12][:2]
        
        # Calculate bounds
        top = max(0, int(min(l_shoulder[1], r_shoulder[1])))
        bottom = min(img.shape[0], int(max(l_hip[1], r_hip[1])))
        left = max(0, int(min(l_shoulder[0], l_hip[0])))
        right = min(img.shape[1], int(max(r_shoulder[0], r_hip[0])))
        
        # Add padding
        padding = 5
        top = max(0, top - padding)
        bottom = min(img.shape[0], bottom + padding)
        left = max(0, left - padding)
        right = min(img.shape[1], right + padding)
        
        # Draw torso box
        img_with_box = img.copy()
        cv2.rectangle(img_with_box, (left, top), (right, bottom), (255, 0, 0), 2)
        
        # Crop torso
        torso = img[top:bottom, left:right]
        
        return img_with_box, torso
    
    boxed_image, torso_crop = get_torso_crop(image_rgb, keypoints)
    
    # Print keypoint coordinates and confidence
    print("\nKeypoint Coordinates and Confidence:")
    for i, (x, y, conf) in enumerate(keypoints):
        if conf > 0.3:  # Only show high-confidence points
            print(f"Keypoint {i}: x={x:.1f}, y={y:.1f}, confidence={conf:.2f}")
    
    # Visualize results
    plt.figure(figsize=(15, 10))
    
    plt.subplot(2, 2, 1)
    plt.title("Original Image")
    plt.imshow(image_rgb)
    
    plt.subplot(2, 2, 2)
    plt.title("Detected Keypoints")
    plt.imshow(keypoint_viz)
    
    plt.subplot(2, 2, 3)
    plt.title("Torso Box")
    plt.imshow(boxed_image)
    
    plt.subplot(2, 2, 4)
    plt.title("Torso Crop")
    plt.imshow(torso_crop)
    
    plt.tight_layout()
    plt.show()
    
    return {
        'original': image_rgb,
        'keypoints': keypoints,
        'keypoint_viz': keypoint_viz,
        'boxed': boxed_image,
        'torso': torso_crop
    }

# Usage
config = 'rtmpose-l_8xb256-420e_coco-256x192.py'
checkpoint = 'rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth'
model = init_model(config, checkpoint, device='cpu')

# Process a single image
image_path = 'path_to_your_player_crop.jpg'
results = visualize_vitpose_steps(image_path, model)

In [None]:
import matplotlib.pyplot as plt

In [None]:
def visualize_pose_results(image_path, results):
    """Visualize the pose estimation results"""
    # Read image
    img = cv2.imread(image_path)
    vis_img = img.copy()
    
    # Get keypoints
    keypoints = results[0].pred_instances.keypoints[0]
    scores = results[0].pred_instances.keypoint_scores[0]
    
    # Points we're interested in
    points_of_interest = {
        5: ('Left Shoulder', (0, 0, 255)),    # Red
        6: ('Right Shoulder', (0, 255, 0)),   # Green
        11: ('Left Hip', (255, 0, 0)),        # Blue
        12: ('Right Hip', (255, 255, 0))      # Cyan
    }
    
    # Draw keypoints
    for idx, (name, color) in points_of_interest.items():
        x, y = map(int, keypoints[idx])
        conf = scores[idx]
        
        # Draw point and label
        cv2.circle(vis_img, (x, y), 3, color, -1)
        cv2.putText(vis_img, f"{name}: {conf:.2f}", 
                   (x-10, y-10), cv2.FONT_HERSHEY_SIMPLEX, 
                   0.3, color, 1)
    
    # Draw torso region
    pts = np.array([
        keypoints[5][:2],  # Left shoulder
        keypoints[6][:2],  # Right shoulder
        keypoints[12][:2], # Right hip
        keypoints[11][:2]  # Left hip
    ], np.int32)
    
    cv2.polylines(vis_img, [pts], True, (255, 255, 255), 1)
    
    # Display results
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.title('Original Image')
    plt.axis('off')
    
    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(vis_img, cv2.COLOR_BGR2RGB))
    plt.title('Detected Pose')
    plt.axis('off')
    
    plt.show()

# Run the visualization
image_path = 'legible_crops/frame_703_player_460_251.jpg'
model = init_model('rtmpose-l_8xb256-420e_coco-256x192.py', 
                  'rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth', 
                  device='cpu')
results = inference_topdown(model, cv2.imread(image_path), [[0, 0, 59, 89]])
visualize_pose_results(image_path, results)

In [None]:
def extract_torso_region(img, results):
   """
   Extract torso region using pose keypoints
   Args:
       img: Original image
       results: Pose estimation results from MMPose
   Returns:
       Cropped torso image
   """
   # Get keypoints
   keypoints = results[0].pred_instances.keypoints[0]
   
   # Extract relevant keypoints
   left_shoulder = keypoints[5][:2].astype(int)
   right_shoulder = keypoints[6][:2].astype(int)
   left_hip = keypoints[11][:2].astype(int)
   right_hip = keypoints[12][:2].astype(int)
   
   # Calculate bounding box with padding
   padding = 5
   x1 = max(0, int(min(left_shoulder[0], left_hip[0])) - padding)
   y1 = max(0, int(min(left_shoulder[1], right_shoulder[1])) - padding)
   x2 = min(img.shape[1], int(max(right_shoulder[0], right_hip[0])) + padding)
   y2 = min(img.shape[0], int(max(left_hip[1], right_hip[1])) + padding)
   
   # Crop torso region
   torso_crop = img[y1:y2, x1:x2]
   
   return torso_crop, (x1, y1, x2, y2)

# Test the torso extraction
def test_torso_extraction():
   # Initialize model and get pose results (using our previous code)
   image_path = 'legible_crops/frame_703_player_460_251.jpg'
   model = init_model('rtmpose-l_8xb256-420e_coco-256x192.py', 
                     'rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth', 
                     device='cpu')
   img = cv2.imread(image_path)
   results = inference_topdown(model, img, [[0, 0, img.shape[1], img.shape[0]]])
   
   # Extract torso region
   torso_crop, bbox = extract_torso_region(img, results)
   
   # Visualize results
   plt.figure(figsize=(15, 5))
   
   # Original image with keypoints and torso region
   vis_img = img.copy()
   x1, y1, x2, y2 = bbox
   
   # Draw keypoints
   keypoints = results[0].pred_instances.keypoints[0]
   keypoint_names = {
       5: ('Left Shoulder', (0, 0, 255)),    # Red
       6: ('Right Shoulder', (0, 255, 0)),   # Green
       11: ('Left Hip', (255, 0, 0)),        # Blue
       12: ('Right Hip', (255, 255, 0))      # Cyan
   }
   
   for idx, (name, color) in keypoint_names.items():
       x, y = map(int, keypoints[idx][:2])
       cv2.circle(vis_img, (x, y), 3, color, -1)
   
   # Draw torso bounding box
   cv2.rectangle(vis_img, (x1, y1), (x2, y2), (255, 255, 255), 2)
   
   # Show original with detected region
   plt.subplot(1, 3, 1)
   plt.imshow(cv2.cvtColor(vis_img, cv2.COLOR_BGR2RGB))
   plt.title('Detected Torso Region')
   plt.axis('off')
   
   # Show cropped torso
   plt.subplot(1, 3, 2)
   plt.imshow(cv2.cvtColor(torso_crop, cv2.COLOR_BGR2RGB))
   plt.title('Cropped Torso')
   plt.axis('off')
   
   plt.show()
   
   return torso_crop

# Run the test
torso_crop = test_torso_extraction()

In [None]:
def extract_torso_region(img, results, padding=5, apply_preprocessing=True):
    """
    Extract torso region with optional preprocessing
    """
    # Get keypoints
    keypoints = results[0].pred_instances.keypoints[0]
    
    # Extract relevant keypoints
    left_shoulder = keypoints[5][:2].astype(int)
    right_shoulder = keypoints[6][:2].astype(int)
    left_hip = keypoints[11][:2].astype(int)
    right_hip = keypoints[12][:2].astype(int)
    
    # Calculate bounding box with padding
    x1 = max(0, int(min(left_shoulder[0], left_hip[0])) - padding)
    y1 = max(0, int(min(left_shoulder[1], right_shoulder[1])) - padding)
    x2 = min(img.shape[1], int(max(right_shoulder[0], right_hip[0])) + padding)
    y2 = min(img.shape[0], int(max(left_hip[1], right_hip[1])) + padding)
    
    # Crop torso region
    torso_crop = img[y1:y2, x1:x2]
    
    if apply_preprocessing:
        # Apply some basic preprocessing
        # 1. Resize to larger size
        torso_crop = cv2.resize(torso_crop, (0,0), fx=2, fy=2)
        
        # 2. Sharpen the image
        kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
        torso_crop = cv2.filter2D(torso_crop, -1, kernel)
        
        # 3. Enhance contrast
        lab = cv2.cvtColor(torso_crop, cv2.COLOR_BGR2LAB)
        l, a, b = cv2.split(lab)
        clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
        cl = clahe.apply(l)
        limg = cv2.merge((cl,a,b))
        torso_crop = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
    
    return torso_crop, (x1, y1, x2, y2)

# Test with different parameters
def test_torso_extraction_with_preprocessing():
    image_path = 'legible_crops/frame_703_player_460_251.jpg'
    model = init_model('rtmpose-l_8xb256-420e_coco-256x192.py', 
                      'rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth', 
                      device='cpu')
    img = cv2.imread(image_path)
    results = inference_topdown(model, img, [[0, 0, img.shape[1], img.shape[0]]])
    
    # Get crops with different processing
    torso_original, bbox = extract_torso_region(img, results, padding=5, apply_preprocessing=False)
    torso_processed, _ = extract_torso_region(img, results, padding=5, apply_preprocessing=True)
    
    # Visualize results
    plt.figure(figsize=(15, 5))
    
    # Original crop
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(torso_original, cv2.COLOR_BGR2RGB))
    plt.title('Original Crop')
    plt.axis('off')
    
    # Processed crop
    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(torso_processed, cv2.COLOR_BGR2RGB))
    plt.title('Processed Crop')
    plt.axis('off')
    
    plt.show()
    
    return torso_original, torso_processed

# Run the test
original_crop, processed_crop = test_torso_extraction_with_preprocessing()

In [None]:
import os
import cv2
import numpy as np
from mmpose.apis import inference_topdown, init_model
from mmpose.utils import register_all_modules
import matplotlib.pyplot as plt

def create_output_directories():
    """Create organized directory structure for outputs"""
    directories = [
        'output',
        'output/original_with_keypoints',
        'output/torso_crops_original',
        'output/torso_crops_processed'
    ]
    
    for dir_path in directories:
        os.makedirs(dir_path, exist_ok=True)
        print(f"Created directory: {dir_path}")

def process_single_image(model, image_path):
    """Process a single image through ViTPose pipeline"""
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Could not read image: {image_path}")
        return None
    
    # Get filename without extension
    base_name = os.path.basename(image_path).split('.')[0]
    
    try:
        # Run pose estimation
        results = inference_topdown(model, img, [[0, 0, img.shape[1], img.shape[0]]])
        
        # Visualize keypoints on original image
        vis_img = img.copy()
        keypoints = results[0].pred_instances.keypoints[0]
        
        # Draw keypoints
        keypoint_names = {
            5: ('Left Shoulder', (0, 0, 255)),    # Red
            6: ('Right Shoulder', (0, 255, 0)),   # Green
            11: ('Left Hip', (255, 0, 0)),        # Blue
            12: ('Right Hip', (255, 255, 0))      # Cyan
        }
        
        for idx, (name, color) in keypoint_names.items():
            x, y = map(int, keypoints[idx][:2])
            cv2.circle(vis_img, (x, y), 3, color, -1)
            cv2.putText(vis_img, name, (x-10, y-10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.3, color, 1)
        
        # Save keypoint visualization
        keypoint_path = f'output/original_with_keypoints/{base_name}_keypoints.jpg'
        cv2.imwrite(keypoint_path, vis_img)
        
        # Extract and save torso crops
        torso_original, torso_processed = extract_torso_crops(img, results)
        
        if torso_original is not None:
            # Save original crop
            crop_path = f'output/torso_crops_original/{base_name}_torso.jpg'
            cv2.imwrite(crop_path, torso_original)
            
            # Save processed crop
            processed_path = f'output/torso_crops_processed/{base_name}_torso_processed.jpg'
            cv2.imwrite(processed_path, torso_processed)
            
        return True
        
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return False

def extract_torso_crops(img, results):
    """Extract both original and processed torso crops"""
    # Get keypoints
    keypoints = results[0].pred_instances.keypoints[0]
    
    # Extract relevant keypoints
    left_shoulder = keypoints[5][:2].astype(int)
    right_shoulder = keypoints[6][:2].astype(int)
    left_hip = keypoints[11][:2].astype(int)
    right_hip = keypoints[12][:2].astype(int)
    
    # Calculate bounding box with padding
    padding = 5
    x1 = max(0, int(min(left_shoulder[0], left_hip[0])) - padding)
    y1 = max(0, int(min(left_shoulder[1], right_shoulder[1])) - padding)
    x2 = min(img.shape[1], int(max(right_shoulder[0], right_hip[0])) + padding)
    y2 = min(img.shape[0], int(max(left_hip[1], right_hip[1])) + padding)
    
    # Get original crop
    torso_original = img[y1:y2, x1:x2]
    
    # Create processed crop
    torso_processed = torso_original.copy()
    
    # Apply preprocessing
    # 1. Resize
    torso_processed = cv2.resize(torso_processed, (0,0), fx=2, fy=2)
    
    # 2. Sharpen
    kernel = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
    torso_processed = cv2.filter2D(torso_processed, -1, kernel)
    
    # 3. Enhance contrast
    lab = cv2.cvtColor(torso_processed, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(lab)
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
    cl = clahe.apply(l)
    limg = cv2.merge((cl,a,b))
    torso_processed = cv2.cvtColor(limg, cv2.COLOR_LAB2BGR)
    
    return torso_original, torso_processed

def process_all_legible_crops():
    """Process all images in the legible_crops directory"""
    # Create output directories
    create_output_directories()
    
    # Initialize model
    print("Initializing ViTPose model...")
    model = init_model('rtmpose-l_8xb256-420e_coco-256x192.py', 
                      'rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth', 
                      device='cpu')
    
    # Process each image
    legible_crops_dir = 'legible_crops'
    total_images = 0
    successful_processes = 0
    
    for filename in os.listdir(legible_crops_dir):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            total_images += 1
            image_path = os.path.join(legible_crops_dir, filename)
            print(f"\nProcessing {filename}...")
            
            if process_single_image(model, image_path):
                successful_processes += 1
    
    print(f"\nProcessing complete!")
    print(f"Total images processed: {total_images}")
    print(f"Successful processes: {successful_processes}")
    print(f"Failed processes: {total_images - successful_processes}")

# Run the processing
register_all_modules()
process_all_legible_crops()

## Post processing + Attempting to identify the number 
(Still a work in progress)

In [None]:
def analyze_failed_image(model, image_name):
    """Detailed analysis of a failed image with better error handling"""
    print(f"\nAnalyzing: {image_name}")
    image_path = os.path.join('legible_crops', image_name)
    
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Could not read image: {image_path}")
        return
    
    print(f"Image shape: {img.shape}")
    
    try:
        # Run pose estimation
        results = inference_topdown(model, img, [[0, 0, img.shape[1], img.shape[0]]])
        
        # Debug print of results structure
        print("\nResults structure:")
        print(f"Results: {results}")
        print(f"Length of results: {len(results)}")
        if len(results) > 0:
            print(f"First result attributes: {dir(results[0])}")
            if hasattr(results[0], 'pred_instances'):
                print(f"Keypoints shape: {results[0].pred_instances.keypoints.shape}")
        
        # Save original image for reference
        os.makedirs('debug_output', exist_ok=True)
        cv2.imwrite(f'debug_output/original_{image_name}', img)
        
    except Exception as e:
        print(f"Error during analysis:")
        print(f"Error type: {type(e)}")
        print(f"Error message: {str(e)}")

# Initialize model and run analysis
register_all_modules()
model = init_model('rtmpose-l_8xb256-420e_coco-256x192.py', 
                  'rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth', 
                  device='cuda' if torch.cuda.is_available() else 'cpu')

# Now pass both model and image_name
analyze_failed_image(model, 'frame_528_player_458_292.jpg')

In [None]:
def analyze_failed_image(model, image_name):
    """Detailed analysis of a failed image with visualization"""
    print(f"\nAnalyzing: {image_name}")
    image_path = os.path.join('legible_crops', image_name)
    
    # Read image
    img = cv2.imread(image_path)
    if img is None:
        print(f"Could not read image: {image_path}")
        return
    
    print(f"Image shape: {img.shape}")
    
    try:
        # Run pose estimation
        results = inference_topdown(model, img, [[0, 0, img.shape[1], img.shape[0]]])
        
        # Get keypoints and scores
        keypoints = results[0].pred_instances.keypoints[0]  # Shape: (17, 2)
        scores = results[0].pred_instances.keypoint_scores[0]  # Shape: (17,)
        
        # Create debug visualization
        debug_img = img.copy()
        
        # Points we're interested in
        key_points = {
            5: ('Left Shoulder', (0, 0, 255)),    # Red
            6: ('Right Shoulder', (0, 255, 0)),   # Green
            11: ('Left Hip', (255, 0, 0)),        # Blue
            12: ('Right Hip', (255, 255, 0))      # Cyan
        }
        
        print("\nKeypoint Coordinates and Confidence:")
        for idx, (name, color) in key_points.items():
            x, y = keypoints[idx]
            conf = scores[idx]
            print(f"{name}: ({x:.2f}, {y:.2f}), confidence: {conf:.2f}")
            
            # Draw on debug image
            x, y = int(x), int(y)
            cv2.circle(debug_img, (x, y), 3, color, -1)
            cv2.putText(debug_img, f"{name}: {conf:.2f}", 
                       (x-10, y-10), cv2.FONT_HERSHEY_SIMPLEX, 
                       0.3, color, 1)
        
        # Calculate and draw potential crop region
        padding = 5
        x1 = max(0, int(min(keypoints[5][0], keypoints[11][0])) - padding)
        y1 = max(0, int(min(keypoints[5][1], keypoints[6][1])) - padding)
        x2 = min(img.shape[1], int(max(keypoints[6][0], keypoints[12][0])) + padding)
        y2 = min(img.shape[0], int(max(keypoints[11][1], keypoints[12][1])) + padding)
        
        print("\nProposed crop dimensions:")
        print(f"x1: {x1}, x2: {x2}, width: {x2-x1}")
        print(f"y1: {y1}, y2: {y2}, height: {y2-y1}")
        
        # Draw crop region
        cv2.rectangle(debug_img, (x1, y1), (x2, y2), (255, 255, 255), 1)
        
        # Save debug visualization
        os.makedirs('debug_output', exist_ok=True)
        cv2.imwrite(f'debug_output/debug_{image_name}', debug_img)
        print(f"\nSaved debug visualization to debug_output/debug_{image_name}")
        
    except Exception as e:
        print(f"Error during analysis:")
        print(f"Error type: {type(e)}")
        print(f"Error message: {str(e)}")

# Run the analysis
analyze_failed_image(model, 'frame_528_player_458_292.jpg')

In [None]:
!zip -r folder_name.zip /kaggle/working/output

### Using ParSeq to identify the jersey number using ViTPose output

In [None]:
!git clone https://github.com/baudm/parseq.git

In [None]:
import os

# Change to parseq directory
%cd parseq

# Install dependencies
!pip install -r requirements/core.txt
!pip install -e .

# Return to original directory
%cd ..

In [None]:
import gdown

# Create models directory
os.makedirs('models', exist_ok=True)

# Download PARSeq model for jersey numbers
model_url = "https://drive.google.com/uc?id=1uRln22tlhneVt3P6MePmVxBWSLMsL3bm"
model_path = "models/parseq_epoch=24-step=2575-val_accuracy=95.6044-val_NED=96.3255.ckpt"
gdown.download(model_url, model_path)

In [None]:
# Try importing required modules
try:
    from strhub.models.parseq.model import PARSeq
    from strhub.models.utils import load_from_checkpoint
    print("PARSeq successfully installed!")
    
    # Try loading the model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = load_from_checkpoint(model_path, device=device)
    print("Model loaded successfully!")
    
except Exception as e:
    print(f"Error during verification: {str(e)}")

In [None]:
!pip install timm==0.4.9
!pip install einops
!pip install pytorch-lightning

In [None]:
cd jersey-number-pipeline/str/parseq

In [None]:
!pip install -r requirements/core.txt

In [None]:
conda env list

In [None]:
!sudo find . -name "*torch*"

In [None]:
import torch

In [None]:
# import strhub.models.parseq.system

In [None]:
# # Uninstall current PyTorch
# !pip uninstall -y torch torchvision torchaudio

# # Install compatible PyTorch version
# !pip install torch==2.0.0 torchvision==0.15.0 torchaudio==2.0.0

# # Verify PyTorch version
# import torch
# print("PyTorch version:", torch.__version__)

In [None]:
import cv2
import torch
from strhub.models.parseq.model import PARSeq
from strhub.models.utils import load_from_checkpoint
from torchvision import transforms
from PIL import Image

def recognize_jersey_number(torso_image):
    """
    Recognize jersey number from torso crop using PARSeq
    Args:
        torso_image: CV2 image (numpy array) of cropped torso region
    Returns:
        predicted number as string
    """
    # Load PARSeq model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = load_from_checkpoint(
        "models/parseq_epoch=24-step=2575-val_accuracy=95.6044-val_NED=96.3255.ckpt",
        device=device
    )
    model.eval()
    
    # Prepare image
    transform = transforms.Compose([
        transforms.Resize((32, 128)),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        )
    ])
    
    # Convert CV2 image to PIL
    pil_image = Image.fromarray(cv2.cvtColor(torso_image, cv2.COLOR_BGR2RGB))
    
    # Process image
    with torch.no_grad():
        img = transform(pil_image).unsqueeze(0).to(device)
        logits = model(img)
        pred = model.tokenizer.decode(logits)
    
    return pred[0]

# Test function to process a directory of torso crops
def process_torso_crops(crops_dir='torso_crops'):
    """Process all torso crops and recognize jersey numbers"""
    results = {}
    
    for filename in os.listdir(crops_dir):
        if filename.endswith(('.jpg', '.jpeg', '.png')):
            image_path = os.path.join(crops_dir, filename)
            
            # Read image
            torso_image = cv2.imread(image_path)
            
            try:
                # Recognize number
                number = recognize_jersey_number(torso_image)
                results[filename] = number
                print(f"{filename}: Detected number {number}")
                
                # Save visualization
                vis_img = torso_image.copy()
                cv2.putText(vis_img, f"Number: {number}", 
                           (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 
                           0.5, (0, 255, 0), 1)
                
                os.makedirs('number_recognition_results', exist_ok=True)
                cv2.imwrite(f'number_recognition_results/numbered_{filename}', vis_img)
                
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")
    
    return results

# Run the processing
results = process_torso_crops()
print("\nProcessing complete!")
print(f"Total images processed: {len(results)}")