In [None]:
import cv2
import pytesseract
import datetime
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import tesserocr
import pandas as pd

In [None]:
cap_path = "Starlink Mission.mp4"

In [None]:
cap = cv2.VideoCapture(cap_path)

In [None]:
def ms(delta):
    return delta.seconds * 1000 + delta.microseconds / 1000

In [None]:
def frame_to_pil_image(frame):
    cv2_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    return Image.fromarray(cv2_img)


def get_frame_at_current_position():
    _, frame = cap.read()
    return frame


def get_image_at_current_position():
    frame = get_frame_at_current_position()
    return frame_to_pil_image(frame)


def get_frame_at_position(ts):
    cap.set(cv2.CAP_PROP_POS_MSEC, ms(ts))
    return get_frame_at_current_position()


def get_image_at_position(ts):
    cap.set(cv2.CAP_PROP_POS_MSEC, ms(ts))
    return get_image_at_current_position()

In [None]:
def resize_img(cv2_img, scale):
  width = int(cv2_img.shape[1] * scale)
  height = int(cv2_img.shape[0] * scale)
  dim = (width, height)
  return cv2.resize(cv2_img, dim, interpolation = cv2.INTER_AREA)

In [None]:
launch_ts = datetime.timedelta(minutes=12, seconds=55)
meco_ts = datetime.timedelta(minutes=15, seconds=29)
s2_first_telemetry_ts = datetime.timedelta(minutes=15, seconds=46)


In [None]:
img = get_frame_at_position(launch_ts)
Image.fromarray(resize_img(img, 0.25))

In [None]:
type(img)

In [None]:
rects = [
  ((118,34),(108,967), "stage1-speed"),
  ((118,34),(265,967), "stage1-altitude"),
  ((118,34),(1529,967), "stage2-speed"),
  ((118,34),(1687,967), "stage2-altitude"),
]
# 1920x1080
rects_relative = [
  (
    (w/1920,h/1080), 
    (x/1920,y/1080), 
    name
  ) for ((w,h), (x,y), name) in rects]

print(rects)
print(rects_relative)

## preview of crop boxes on top of image

In [None]:
frame = get_frame_at_position(launch_ts)
cv2_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
width = int(cv2_img.shape[1])
height = int(cv2_img.shape[0])

for ((w_rel,h_rel), (x_rel,y_rel), name) in rects_relative:
  x = int(x_rel * width)
  y = int(y_rel * height)
  w = int(w_rel * width)
  h = int(h_rel * height)
  cv2_img2 = cv2.rectangle(cv2_img, (x, y), (x+w, y+h), (255,0,0))


print('Original Dimensions : ', cv2_img2.shape)
resized = resize_img(cv2_img2, 0.5)
print('Resized Dimensions : ', resized.shape)

Image.fromarray(resized)

In [None]:
def extract_regions_from_image(cv2_img):
  res = {}
  width = int(cv2_img.shape[1])
  height = int(cv2_img.shape[0])
  for ((w_rel,h_rel), (x_rel,y_rel), name) in rects_relative:
    x = int(x_rel * width)
    y = int(y_rel * height)
    w = int(w_rel * width)
    h = int(h_rel * height)   
    res[name] = cv2_img[y:y+h, x:x+w]

  return res

In [None]:
def plot_regions_at_ts(ts):
  frame = get_frame_at_position(ts)
  cv2_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
  regions = extract_regions_from_image(cv2_img)

  # figure size in inches optional

  rcParams['figure.figsize'] = 6, 3
  fig, ((ax_s1_v, ax_s1_h), (ax_s2_v, ax_s2_h)) = plt.subplots(2,2)

  axs = [ax_s1_v, ax_s1_h, ax_s2_v, ax_s2_h]
  names = ["stage1-speed", "stage1-altitude", "stage2-speed", "stage2-altitude"]

  for (ax, name) in zip(axs, names):
    ax.imshow(Image.fromarray(regions[name]))
    ax.axis("off")
    ax.set_title(name)

In [None]:
plot_regions_at_ts(launch_ts)

In [None]:
plot_regions_at_ts(meco_ts)

In [None]:
plot_regions_at_ts(s2_first_telemetry_ts)

In [None]:
fairing_deploy_bad_quality = datetime.timedelta(minutes=15, seconds=55)
plot_regions_at_ts(fairing_deploy_bad_quality)

## talk to tesseract

In [None]:
frame = get_frame_at_position(meco_ts)
cv2_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

regions = extract_regions_from_image(cv2_img)

In [None]:
custom_config = r'--oem 3 --psm 6 outputbase digits'
s1_v = pytesseract.image_to_string(regions["stage1-speed"], config=custom_config)
s1_h = pytesseract.image_to_string(regions["stage1-altitude"], config=custom_config)
s2_v = pytesseract.image_to_string(regions["stage2-speed"], config=custom_config)
s2_h = pytesseract.image_to_string(regions["stage2-altitude"], config=custom_config)

(s1_v, s1_h, s2_v, s2_h)

In [None]:
regions_resized = { k: resize_img(img, 0.5) for k, img in regions.items() }

In [None]:
custom_config = r'--oem 3 --psm 6 outputbase digits'
s1_v = pytesseract.image_to_string(regions_resized["stage1-speed"], config=custom_config)
s1_h = pytesseract.image_to_string(regions_resized["stage1-altitude"], config=custom_config)
s2_v = pytesseract.image_to_string(regions_resized["stage2-speed"], config=custom_config)
s2_h = pytesseract.image_to_string(regions_resized["stage2-altitude"], config=custom_config)

(s1_v, s1_h, s2_v, s2_h)

## speeding things up with tesserocr?

In [None]:
api = tesserocr.PyTessBaseAPI(psm=tesserocr.PSM.SINGLE_BLOCK)
from string import digits
api.SetVariable('tessedit_char_whitelist', ".1234567890")

In [None]:
pil_regions_resized = { k: Image.fromarray(resize_img(img, 0.5)) for k, img in regions.items() }

In [None]:
# test_images = [pil_regions_resized["stage1-speed"], pil_regions_resized["stage1-altitude"]] * 500
# for img in test_images:
#   api.SetImage(img)
#   text = api.GetUTF8Text()
#   print(f"{k}: {text}")

## 500 images in 8.9s --> 17.8ms/img --> almost 60fps :D

In [None]:
for k, img in pil_regions_resized.items():
  pil_image = img
  api.SetImage(pil_image)
  text = api.GetUTF8Text()
  print(f"{k}: {text}")

In [None]:
plot_regions_at_ts(meco_ts)

In [None]:
next_ts = launch_ts + datetime.timedelta(milliseconds=16)

print(launch_ts)
print(ms(launch_ts))
print(ms(next_ts))

launch_ts, next_ts


## speed test raw cv2 video reading

In [None]:
cap.set(cv2.CAP_PROP_POS_MSEC, ms(launch_ts))

# 100 frames in 0.3s

frame_cnt = 0
while frame_cnt < 100:
  res, frame = cap.read()
  frame_cnt += 1
  if frame_cnt == 100:
    print(f"frame_cnt: {frame_cnt}, pos_frames: {cap.get(cv2.CAP_PROP_POS_FRAMES)}, pos_msec: {cap.get(cv2.CAP_PROP_POS_MSEC)} ")


## Experimenting with mutating the image to enhance

In [None]:
def plot_images(labelled_images):
  cnt = len(labelled_images)

  # figure size in inches optional

  rcParams['figure.figsize'] = 12, 4
  fig, axs = plt.subplots(1,cnt)

  #names = ["stage1-speed", "stage1-altitude", "stage2-speed", "stage2-altitude"]

  for (ax, (name, img)) in zip(axs, labelled_images):
    ax.imshow(Image.fromarray(img))
    ax.axis("off")
    ax.set_title(name)

In [None]:
img = get_frame_at_position(launch_ts)
resized = resize_img(img, 0.25)
inverted = ~resized #cv2.bitwise_not(resized)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
threshold = cv2.threshold(gray, 25, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
Image.fromarray(gray)

labelled_images = [
  ("img", img),
  ("resized", resized),
  ("inverted", inverted),
  ("gray", gray),
  ("threshold", threshold),
]
plot_images(labelled_images)



In [None]:
def add_border(cv2_img):
  bordersize = 10
  return cv2.copyMakeBorder(
    cv2_img,
    top=bordersize,
    bottom=bordersize,
    left=bordersize,
    right=bordersize,
    borderType=cv2.BORDER_CONSTANT,
    value = [0,0,0]
  )


In [None]:
api = tesserocr.PyTessBaseAPI(psm=tesserocr.PSM.SINGLE_LINE)
from string import digits
api.SetVariable('tessedit_char_whitelist', ".1234567890")

cap.set(cv2.CAP_PROP_POS_MSEC, ms(launch_ts))

# every 15th frame --> 2 fps
start_frame = cap.get(cv2.CAP_PROP_POS_FRAMES)

ocr_result = []

frame_cnt = 0
frame_delta = 15
fps = cap.get(cv2.CAP_PROP_FPS)
capture_seconds = 15 # 580 # 9:40min ==> 580s
capture_frames_per_second = fps / frame_delta # 2
max_frame_count = capture_seconds * capture_frames_per_second

while frame_cnt <= max_frame_count: #100 frames à 2 fps --> 50s
  cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame + frame_delta*frame_cnt)
  _, frame = cap.read()
  
  current_frame = cap.get(cv2.CAP_PROP_POS_FRAMES)
  current_time_ms = cap.get(cv2.CAP_PROP_POS_MSEC)

  res = {}
  
  # resized = resize_img(frame, 0.5)
  inverted = ~frame
  
  cv2_img = cv2.cvtColor(inverted, cv2.COLOR_BGR2RGB)
  regions = extract_regions_from_image(cv2_img)

  for k, img in regions.items(): 

    grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    (thresh, blackAndWhiteImage) = cv2.threshold(grayImage, 127, 255, cv2.THRESH_BINARY)
    # with_border = add_border(blackAndWhiteImage)

    pil_image = Image.fromarray(blackAndWhiteImage)
    api.SetImage(pil_image)
    text = api.GetUTF8Text()
    res[k] = text.strip()
    # print(f"{k}: {text}")
  
  res['frame'] = current_frame
  res['ms'] = current_time_ms
  res['regions'] = regions

  ocr_result.append(res)
  frame_cnt += 1
  if frame_cnt % (capture_frames_per_second * 10) == 0:
    print(f"captured {frame_cnt} frames --> {frame_cnt / (capture_frames_per_second)}s" )

start_time = ocr_result[0]['ms']
for res in ocr_result:
  res['ms'] = res['ms'] - start_time
  res['frame'] = res['frame'] - start_frame



In [None]:
df = pd.DataFrame(ocr_result)
df.drop("regions", axis=1)

In [None]:
res = ocr_result[11]
res_cleaned = dict(res)
del res_cleaned['regions']
regions = res['regions']
print(res_cleaned)
plot_images(regions.items())
print(len(regions))
print(type(regions))

img = regions['stage1-speed']
Image.fromarray(img)

# Image.fromarray(res['regions']['stage1-speed'])

In [None]:
api = tesserocr.PyTessBaseAPI(psm=tesserocr.PSM.RAW_LINE)
from string import digits
api.SetVariable('tessedit_char_whitelist', ".1234567890")

grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(thresh, blackAndWhiteImage) = cv2.threshold(grayImage, 127, 255, cv2.THRESH_BINARY)
# bigger = resize_img(blackAndWhiteImage, 1.5)
with_border = add_border(blackAndWhiteImage)
pil_img = Image.fromarray(blackAndWhiteImage)
api.SetImage(pil_img)
api.GetUTF8Text()

In [None]:
pil_img

In [None]:
custom_config = r'--oem 3 --psm 6 outputbase digits'
pytesseract.image_to_string(pil_img, config=custom_config)

In [None]:
print(pil_img.size)