In [8]:
# requirements
%pip install -q easyocr
%pip install -q ocrd-fork-pylsd==0.0.3

In [None]:
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from datasets.test_dataset import LoadImages
from perspective import Perspective
from screen_ocr import ScreenOCR
from models.experimental import attempt_load
from utils.torch_utils import select_device
from utils.general import non_max_suppression, scale_coords, xyxy2xywh
import pandas as pd
import time

In [None]:
class FullPipeline(object):
    def __init__(self, weights, device=''):
        # self.detection_model = None    # TODO complete model loading
        self.device = select_device(device)
        self.detection_model = attempt_load(weights, map_location=self.device)
        self.stride = int(self.detection_model.stride.max())
        self.classifier_model = None
        self.ppt = Perspective()       # class for handling perspective change
        self.ocr = ScreenOCR()         # class for handling OCR part
        self.detection_model.eval()
        self.classifier_model.eval()

    def crop_bboxes(self, image, xywh, margin=15):
        h_img, w_img = image.shape
        x1 = int(max(0, xywh[0]*w_img-margin))
        y1 = int(max(0, xywh[1]*h_img-margin))
        x2 = int(min(w_img, xywh[0]*w_img + xywh[2]*w_img + margin))
        y2 = int(min(h_img, xywh[1]*h_img + xywh[3]*h_img + margin))
        cropped_image = image[:, y1:y2, x1:x2]
        warped_image, success = self.ppt.shift_perspective(cropped_image)
        return warped_image

    def clean_img(self, img):
        return img

    def evaluate(self, test_data, classify=False):
        df = pd.DataFrame(columns=['rr', 'hr', 'spo2', 'map', 'sys', 'dia'])
        for path, img, im0 in test_data:
            img = torch.from_numpy(img).to(self.device)
            img /= 255.0  # 0 - 255 to 0.0 - 1.0
            if img.ndimension() == 3:
                img = img.unsqueeze(0)
            with torch.no_grad():
                pred = self.detection_model(img, augment=True)[0]
            pred = non_max_suppression(pred, 0.25, 0.45, agnostic=True)
            det = pred[0]
            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]
            if len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
                for *xyxy, conf, cls in reversed(det):
                    xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()
            cropped_image = self.crop_bboxes(im0, xywh)
            if classify:
                with torch.no_grad():
                    screen_types = self.classifier_model(cropped_image)
            # cleaning and OCR part
            cleaned = self.clean_img(img)
            vitals_dict = self.ocr.read_vitals(image=cleaned, img_rgb=img)
            df = df.append(vitals_dict, ignore_index=True)
        return df


In [None]:
YOLOv7_WEIGHT  = ''

def inference(image_path:str):
    """
    Function responsible for inference.
    Args: 
      image_path: str, path to image file. eg. "input/aveksha_micu_mon--209_2023_1_17_12_0_34.jpeg"
    Returns:
      result: dict, final output dictionary. eg. {"HR":"80", "SPO2":"98", "RR":"15", "SBP":"126", "DBP":"86"}
    """
    result = {}
    ### put your code here
    t0=time.time()
    fpl = FullPipeline(YOLOv7_WEIGHT)

    t1 = time.time()
    dataset = LoadImages(image_path, img_size=640, stride=fpl.stride)

    t2 = time.time()
    df = fpl.evaluate(dataset)
    

    return result    