# Experiment 1

Extracting the score of the match from a single frame

## Imports

In [3]:
# %env HSA_OVERRIDE_GFX_VERSION=10.3.0

import cv2
import numpy as np
from util import vid_to_frames
import torch
from PIL import Image

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [4]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')
    print('Using the GPU 😎')
else:
    device = torch.device('cpu')
    print('Using the CPU ‼️')

# device = torch.device('cpu')

Using the CPU ‼️


In [5]:
test_vid = './test/test_vid.mp4'

PATCH_HEIGHT = 14
PATCH_WIDTH = 19

PATCH_TOP = 310
PATCH_BOTTOM = PATCH_TOP + PATCH_HEIGHT
L_PATCH_LEFT = 268
L_PATCH_RIGHT = L_PATCH_LEFT + PATCH_WIDTH
L_PATCH_MID = (L_PATCH_LEFT + L_PATCH_RIGHT) // 2 - 2
R_PATCH_LEFT = 357
R_PATCH_RIGHT = R_PATCH_LEFT + PATCH_WIDTH
R_PATCH_MID = (R_PATCH_LEFT + R_PATCH_RIGHT) // 2 - 2
ADJ = -32

cap = cv2.VideoCapture(test_vid)
cap.set(cv2.CAP_PROP_POS_FRAMES, 16_000)
flags, arb_frame = cap.read()
arb_frame = cv2.cvtColor(arb_frame, cv2.COLOR_BGR2GRAY)

lwhole_patch = arb_frame[PATCH_TOP:PATCH_BOTTOM, L_PATCH_LEFT:L_PATCH_RIGHT]
_, lwhole_patch = cv2.threshold(lwhole_patch, 255 + ADJ, 255, cv2.THRESH_BINARY)

lhalf_patch1 = lwhole_patch.copy()
lhalf_patch1[:, (L_PATCH_MID-L_PATCH_LEFT):] = 0

lhalf_patch2 = lwhole_patch.copy()
lhalf_patch2[:, :(L_PATCH_MID-L_PATCH_LEFT)] = 0
_, lhalf_patch2 = cv2.threshold(lhalf_patch2, 255 + ADJ, 255, cv2.THRESH_BINARY)
hpad = np.zeros((lhalf_patch2.shape[0], 28 - lhalf_patch2.shape[1]))
lhalf_patch2 = np.hstack((lhalf_patch2, hpad))

cv2.imshow('whole',lwhole_patch)
cv2.imshow('half1',lhalf_patch1)
cv2.imshow('half2',lhalf_patch2)
cv2.waitKey()
cv2.destroyAllWindows()

if np.any(lwhole_patch[:, 0] > 100):
    print("L's score is 2 digits")
else:
    print("L's score is 1 digit")

rwhole_patch = arb_frame[PATCH_TOP:PATCH_BOTTOM, R_PATCH_LEFT:R_PATCH_RIGHT]
_, rwhole_patch = cv2.threshold(rwhole_patch, 255 + ADJ, 255, cv2.THRESH_BINARY)

rhalf_patch1 = rwhole_patch.copy()
rhalf_patch1[:, (R_PATCH_MID-R_PATCH_LEFT):] = 0

rhalf_patch2 = rwhole_patch.copy()
rhalf_patch2[:, :(R_PATCH_MID-R_PATCH_LEFT)] = 0
_, rhalf_patch2 = cv2.threshold(rhalf_patch2, 255 + ADJ, 255, cv2.THRESH_BINARY)

cv2.imshow('whole',rwhole_patch)
cv2.imshow('half1',rhalf_patch1)
cv2.imshow('half2',rhalf_patch2)
cv2.waitKey()
cv2.destroyAllWindows()

: 

bounding boxes (tlx, tly, brx, bry):

height: 310-330
width: 265-290, 355-375

lscore: (265, 310, 290, 330)
rscore: (355, 310, 370, 330)

In [53]:
from transformers import AutoImageProcessor, SiglipForImageClassification

model_name = "prithivMLmods/Mnist-Digits-SigLIP2"
mnist_model = SiglipForImageClassification.from_pretrained(model_name)
input_processor = AutoImageProcessor.from_pretrained(model_name, use_fast=True)

PATCH_HEIGHT = 14
PATCH_WIDTH = 19

PATCH_TOP = 310
PATCH_BOTTOM = PATCH_TOP + PATCH_HEIGHT
L_PATCH_LEFT = 268
L_PATCH_RIGHT = L_PATCH_LEFT + PATCH_WIDTH
L_PATCH_MID = (L_PATCH_LEFT + L_PATCH_RIGHT) // 2 - 2
R_PATCH_LEFT = 358
R_PATCH_RIGHT = R_PATCH_LEFT + PATCH_WIDTH
R_PATCH_MID = (R_PATCH_LEFT + R_PATCH_RIGHT) // 2 - 2
ADJ = -32

def predict_score_from_frame(frame, view_patches=False):
    blurred = frame
    # blurred = cv2.blur(frame, (2, 2))

    def vert_pad(patch):
        vpad = np.zeros((28 - patch.shape[0], patch.shape[1]))
        return np.vstack((patch, vpad))

    def horiz_pad(patch):
        hpad = np.zeros((patch.shape[0], 28 - patch.shape[1]))
        return np.hstack((patch, hpad))

    # the whole patch of FotL's score
    # this will be most accurate if score is 1 digit
    lwhole_patch = blurred[PATCH_TOP:PATCH_BOTTOM, L_PATCH_LEFT:L_PATCH_RIGHT]
    _, lwhole_patch = cv2.threshold(lwhole_patch, 255 + ADJ, 255, cv2.THRESH_BINARY)
    l_double = np.any(lwhole_patch[:, 0] > 100) # if any pixels on the left border are lit, it's double digits (1X where 0 <= X <= 5)
    lwhole_patch = horiz_pad(lwhole_patch)
    # lwhole_patch = vert_pad(lwhole_patch)

    # the masked RHS half-patch of FotL's score
    lhalf_patch2 = lwhole_patch.copy()
    lhalf_patch2[:, :(L_PATCH_MID-L_PATCH_LEFT)] = 0
    _, lhalf_patch2 = cv2.threshold(lhalf_patch2, 255 + ADJ, 255, cv2.THRESH_BINARY)
    lhalf_patch2 = horiz_pad(lhalf_patch2)
    # lhalf_patch2 = vert_pad(lhalf_patch2)

    if view_patches:
        cv2.imshow('left patch whole', lwhole_patch)
        cv2.imshow('left patch half2', lhalf_patch2)
        cv2.waitKey()
        cv2.destroyAllWindows()

    # the whole patch of FotR's score
    rwhole_patch = blurred[PATCH_TOP:PATCH_BOTTOM, R_PATCH_LEFT:R_PATCH_RIGHT]
    _, rwhole_patch = cv2.threshold(rwhole_patch, 255 + ADJ, 255, cv2.THRESH_BINARY)
    r_double = np.any(rwhole_patch[:, 0] > 100) # if any pixels on the left border are lit, it's double digits (1X where 0 <= X <= 5)
    rwhole_patch = horiz_pad(rwhole_patch)
    # rwhole_patch = vert_pad(rwhole_patch)

    # the masked RHS half-patch of FotR's score
    rhalf_patch2 = rwhole_patch.copy()
    rhalf_patch2[:, :(L_PATCH_MID-L_PATCH_LEFT)] = 0
    _, rhalf_patch2 = cv2.threshold(rhalf_patch2, 255 + ADJ, 255, cv2.THRESH_BINARY)
    rhalf_patch2 = horiz_pad(rhalf_patch2)
    # rhalf_patch2 = vert_pad(rhalf_patch2)

    if view_patches:
        cv2.imshow('right patch whole', rwhole_patch)
        cv2.imshow('left patch half2', rhalf_patch2)
        cv2.waitKey()
        cv2.destroyAllWindows()

    lwhole_input = input_processor(images=Image.fromarray(lwhole_patch), return_tensors='pt')
    lhalf2_input = input_processor(images=Image.fromarray(lhalf_patch2), return_tensors='pt')

    rwhole_input = input_processor(images=Image.fromarray(rwhole_patch), return_tensors='pt')
    rhalf2_input = input_processor(images=Image.fromarray(rhalf_patch2), return_tensors='pt')

    with torch.no_grad():
        lwhole_outputs = mnist_model(**lwhole_input)
        lwhole_logits = lwhole_outputs.logits
        lwhole_probs = torch.nn.functional.softmax(lwhole_logits, dim=1).squeeze()

        lhalf2_outputs = mnist_model(**lhalf2_input)
        lhalf2_logits = lhalf2_outputs.logits
        lhalf2_probs = torch.nn.functional.softmax(lhalf2_logits, dim=1).squeeze()

        rwhole_outputs = mnist_model(**rwhole_input)
        rwhole_logits = rwhole_outputs.logits
        rwhole_probs = torch.nn.functional.softmax(rwhole_logits, dim=1).squeeze()

        rhalf2_outputs = mnist_model(**rhalf2_input)
        rhalf2_logits = rhalf2_outputs.logits
        rhalf2_probs = torch.nn.functional.softmax(rhalf2_logits, dim=1).squeeze()

    def most_likely(whole_probs, half2_probs, double_digit):
        if double_digit:
            num = 10 + half2_probs.argmax().item()
            prob = half2_probs.max().item()

            # 17 not a possible answer, but 1's get classified as 7's too often so this is most likely
            if num == 17:
                num = 11
        else:
            num = whole_probs.argmax().item()
            prob = whole_probs.max().item()

        return num, prob

    return most_likely(lwhole_probs, lhalf2_probs, l_double),\
            most_likely(rwhole_probs, rhalf2_probs, r_double)


In [55]:
cv2.destroyAllWindows()

# cap.set(cv2.CAP_PROP_POS_FRAMES, 16_500)
# flags, arbitrary_frame = cap.read()
# arbitrary_frame = cv2.cvtColor(arbitrary_frame, cv2.COLOR_BGR2GRAY)
# print(predict_score_from_frame(arbitrary_frame, view_patches=True))

# cv2.imshow('arbitrary frame', arbitrary_frame)
# cv2.waitKey()
# cv2.destroyAllWindows()

for i in range(1, 84):
    cap.set(cv2.CAP_PROP_POS_FRAMES, i * 250)
    flags, arbitrary_frame = cap.read()
    arbitrary_frame = cv2.cvtColor(arbitrary_frame, cv2.COLOR_BGR2GRAY)

    (lscore, lconf), (rscore, rconf) = predict_score_from_frame(arbitrary_frame)

    cv2.imshow(f'frame {i * 500}: {lscore} to {rscore} ({lconf*100:.2f}%, {rconf*100:.2f}%)', arbitrary_frame)
    cv2.waitKey()
    cv2.destroyAllWindows()

# Verifying labeling

In [11]:
import os
import pandas as pd

csvs = os.listdir('./Data/ScoreInfo/Sabre/')

nominal = 0
exceptional = 0

for csv in csvs:
    info = pd.read_csv('./Data/ScoreInfo/Sabre/' + csv, header=0)

    prev_lscore, prev_rscore = 0, 0
    for i, row in info.iterrows():
        lscore, rscore = row['lscore'], row['rscore']
        if prev_lscore > lscore or prev_rscore > rscore:
            exceptional += 1
        elif lscore > prev_lscore + 1 or rscore > prev_rscore + 1:
            exceptional += 1
        else:
            nominal += 1

        prev_lscore, prev_rscore = lscore, rscore

print(f'{exceptional = }, {nominal = }, %exceptional = {exceptional / (exceptional + nominal) * 100 :.2f}%, %nominal = {nominal / (exceptional + nominal) * 100 :.2f}%')

exceptional = 977, nominal = 15506, %exceptional = 5.93%, %nominal = 94.07%


In [None]:
import os
import pandas as pd

csvs = os.listdir('./Data/ScoreInfo/Sabre/')

nominal = 0
exceptional = 0

for csv in csvs:
    info = pd.read_csv('./Data/ScoreInfo/Sabre/' + csv, header=0)
    # info.set_index('frame_no')
    info = info.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'], errors='ignore')
    info['frame_no']

    csv_text = info.to_csv(index=False)
    with open('./Data/ScoreInfo/Sabre/' + csv, 'w') as f:
        f.write(csv_text)

# Cut clips based on labels

In [None]:
import os
import pandas as pd

info = pd.read_csv('./Data/ScoreInfo/Sabre/0000sabre.csv', header=0)

for i, row in info.iterrows():
    # skip the "first detection" which is really just the 0-0 score in the first frame
    if i == 0:
        continue

    frame_no, ms, lscore, rscore, lconf, rconf, nominal = row

    # skip any data marked as not nominal
    if not nominal:
        continue

    # on "average" (not rigorously tested), referees change update the scoreboard ~2-3 seconds after the point is scored,
    # so we can ignore the last second of video before a scoreboard update
    clip_end = ms - 1000

    # using clip length of 5 seconds for now (maybe variable in the future?)
    clip_start = clip_end - 5000
