# ... modello pre-trainato su Colab

In [1]:
import cv2
import time
from ultralytics import YOLO
import numpy as np
import os
import re
import pytesseract
import json
import pandas as pd

In [2]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA Graphics Device


In [3]:
import torch
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.get_device_name(0))

print(torch.cuda.get_device_capability(0))

2.7.0+cu128
12.8
NVIDIA Graphics Device
(12, 0)


In [4]:
def preprocess_timer_image(timer_region):
    """Preelabora l'immagine del timer per migliorare l'OCR (versione migliorata)"""
    # Converti in scala di grigi
    gray = cv2.cvtColor(timer_region, cv2.COLOR_BGR2GRAY)

    # Ingrandisci l'immagine
    gray = cv2.resize(gray, None, fx=3, fy=3, interpolation=cv2.INTER_CUBIC)

    # Sfoca leggermente per ridurre il rumore
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    # Binarizzazione con Otsu
    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Se necessario, inverte bianco/nero
    white_pixels = np.sum(thresh == 255)
    black_pixels = np.sum(thresh == 0)
    if white_pixels < black_pixels:
        thresh = cv2.bitwise_not(thresh)

    return thresh


def read_timer_text(timer_region, timestamp):
    """Legge il testo dal timer usando OCR"""
    processed_img = preprocess_timer_image(timer_region)
    
    config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789:'
    raw_text = pytesseract.image_to_string(processed_img, config=config).strip()
    print(f"[OCR] Testo grezzo: '{raw_text}'")
    
    # Parse the target timestamp
    parts = timestamp.split(":")
    searched_mins = int(parts[0])
    searched_secs = int(parts[1])

    # Normalizza vari casi possibili
    if searched_mins == 0:
        # For timestamps like 00:XX, we're mostly concerned with seconds
        raw_text = ''.join(char for char in raw_text if char.isdigit())
        if len(raw_text) >= 2:
            # Extract just the seconds part
            try:
                secs = int(raw_text[-3:-1])
                print(f"[OCR] Testo riconosciuto :ssd... '00:{secs:02d}'")
                return 0, secs
            except ValueError:
                return None
        return None
    else:
        # Try to parse MM:SS format
        match = re.search(r'(\d{1,2}):(\d{2})', raw_text)
        if match:
            mins = int(match.group(1))
            secs = int(match.group(2))
            print(f"[OCR] Testo riconosciuto mm:ss... {mins}:{secs:02d}")
            return mins, secs
        
        # Try to parse continuous digits as minutes and seconds
        match = re.search(r'(\d{3,4})', raw_text)
        if match and len(match.group(1)) >= 3:
            digits = match.group(1)
            mins = int(digits[:-2])
            secs = int(digits[-2:])
            print(f"[OCR] Testo riconosciuto mmss... {mins}:{secs:02d}")
            return mins, secs
            
        return None


def extract_frames_by_timestamp(video_path, output_dir="/home/diego/Documents/GitHub/NBA/Computer_vision/Frames_of_shot"):
    """Estrae i frame corrispondenti ai timestamp specificati"""
    # Get video file name without extension for output naming
    target_timestamp = video_path[-9:-4]
    video_filename = os.path.basename(video_path)
    name_file = os.path.splitext(video_filename)[0]
    
    found_timestamps = []
    os.makedirs(output_dir, exist_ok=True)
    
    # Parse the target timestamp
    parts = target_timestamp.split(":")
    searched_mins, searched_secs = int(parts[0]), int(parts[1])
    standardized_target = f"{searched_mins:02d}:{searched_secs:02d}"

    print(f"Cercando il timestamp: {standardized_target}")
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Impossibile aprire il video: {video_path}")
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_duration = total_frames / fps
    
    print(f"Video: {os.path.basename(video_path)}")
    print(f"Durata: {video_duration:.2f} secondi ({total_frames} frames)")
    print(f"FPS: {fps}")
    
    check_interval = max(1, int(fps / 2))
    start_time = time.time()
    
    for frame_idx in range(0, total_frames, check_interval):
        if frame_idx % (check_interval * 10) == 0:
            progress = (frame_idx / total_frames) * 100
            elapsed = time.time() - start_time
            remaining = (elapsed / (frame_idx + 1)) * (total_frames - frame_idx) if frame_idx > 0 else 0
            print(f"Progresso: {progress:.1f}% (tempo rimanente stimato: {remaining:.1f}s)")
        
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
        ret, frame = cap.read()
        if not ret:
            break
        
        results = model(frame, conf=0.5)  # Assuming model is defined elsewhere
        
        for r in results:
            boxes = r.boxes
            for box in boxes:
                conf = float(box.conf[0])
                
                if conf > 0.5:
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    timer_region = frame[y1:y2, x1:x2].copy()
                    
                    # Salva la regione del timer per debug (opzionale)
                    #debug_path = os.path.join(output_dir, f"timer_debug_{frame_idx}.jpg")
                    #cv2.imwrite(debug_path, timer_region)
                    
                    timer_text = read_timer_text(timer_region, target_timestamp)
                    
                    if timer_text:
                        found_mins, found_secs = timer_text
                        
                        if found_mins == searched_mins and found_secs == searched_secs:
                            out_filename = f"{name_file}_{found_mins:02d}_{found_secs:02d}_frame{frame_idx}.jpg"
                            output_path = os.path.join(output_dir, out_filename)
                            cv2.imwrite(output_path, frame)                            
                            found_timestamp = f"{found_mins:02d}:{found_secs:02d}"
                            found_timestamps.append(found_timestamp)
                            print(f"Trovato timestamp {found_mins:02d}:{found_secs:02d} nel frame {frame_idx}")
    
    cap.release()
    
    if not found_timestamps:
        print(f"Il timestamp {standardized_target} non è stato trovato")
    
    return found_timestamps

In [5]:
# Carica il modello addestrato
model = YOLO('/home/diego/Documents/GitHub/NBA/Computer_vision/timer_detector/weights/best.pt')

# Percorso della cartella con i video
cartella_video = "/home/diego/Documents/GitHub/NBA/Video_bos"

# Estensioni video supportate
estensioni_video = ['.mp4', '.avi', '.mov', '.mkv']

# Trova tutti i file video nella cartella
tutti_i_video = [
    os.path.join(cartella_video, f) for f in os.listdir(cartella_video)
    if os.path.isfile(os.path.join(cartella_video, f)) and any(f.lower().endswith(est) for est in estensioni_video)
]

with open('/home/diego/Documents/GitHub/NBA/database_bos.json', 'r') as f:
    lines = [json.loads(line) for line in f]

df_bos = pd.DataFrame(lines)

count = 0

for index, row in df_bos.iterrows():
    print('\n-----------------------------------------------------------\n')
    print(index)
    if row["videoID"]:
        video_path = f"/home/diego/Documents/GitHub/NBA/Video_bos/{row['videoID']}"

        if video_path in tutti_i_video:
            found = extract_frames_by_timestamp(video_path)
            print(f"Timestamp trovati: {found}")
            count += 1

        if count == 5:
            break
        else:
            print("VIDEO NOT FOUND")


-----------------------------------------------------------

0
Cercando il timestamp: 11:48
Video: bos-vs-nyk-0022300065_1_11:48.mp4
Durata: 9.57 secondi (574 frames)
FPS: 60.0
Progresso: 0.0% (tempo rimanente stimato: 0.0s)

0: 384x640 1 timer, 51.0ms
Speed: 4.1ms preprocess, 51.0ms inference, 97.2ms postprocess per image at shape (1, 3, 384, 640)
[OCR] Testo grezzo: '11:54'
[OCR] Testo riconosciuto mm:ss... 11:54

0: 384x640 1 timer, 6.4ms
Speed: 3.1ms preprocess, 6.4ms inference, 3.2ms postprocess per image at shape (1, 3, 384, 640)
[OCR] Testo grezzo: '11:53'
[OCR] Testo riconosciuto mm:ss... 11:53

0: 384x640 1 timer, 6.8ms
Speed: 1.9ms preprocess, 6.8ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)
[OCR] Testo grezzo: '11:53'
[OCR] Testo riconosciuto mm:ss... 11:53

0: 384x640 1 timer, 4.7ms
Speed: 1.2ms preprocess, 4.7ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
[OCR] Testo grezzo: '1:52'
[OCR] Testo riconosciuto mm:ss... 1:52

0: 384x6

KeyboardInterrupt: 