/
yolov5.py
100 lines (85 loc) · 2.86 KB
/
yolov5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from dataclasses import dataclass
from functools import partial
from typing import Tuple, Optional, List
import numpy as np
import pandas as pd
import torch
from dataclasses_json import DataClassJsonMixin
from torch import nn
from entities import (
ModelObject,
InferenceFunction,
InferenceResult,
BoundingBox,
PredictionDetails,
Detection,
)
MODEL_REPOSITORY = "ultralytics/yolov5"
@dataclass(frozen=True)
class YoloV5FactoryParameters(DataClassJsonMixin):
model_version: str
inference_size: int
def load_yolov5_model(
factory_parameters: YoloV5FactoryParameters,
device: torch.device,
weights_path: Optional[str] = None,
) -> Tuple[ModelObject, InferenceFunction]:
model = torch.hub.load(
MODEL_REPOSITORY, factory_parameters.model_version, device=device
)
if weights_path is not None:
state_dict = torch.load(weights_path, map_location=device)
model.load_state_dict(state_dict)
model.eval()
return model, partial(infer, inference_size=factory_parameters.inference_size)
def infer(
model: nn.Module, images: List[np.ndarray], _: torch.device, inference_size: int
) -> List[InferenceResult]:
with torch.no_grad():
inference_results = model(images, size=inference_size)
return [
prepare_inference_result(image=image, result=result)
for image, result in zip(images, inference_results.pandas().xyxy)
]
def prepare_inference_result(
image: np.ndarray, result: pd.DataFrame
) -> InferenceResult:
image_height, image_width = image.shape[:2]
detections = []
for _, detection in result.iterrows():
detection_bounding_box = prepare_detection_bounding_box(
detection=detection,
image_height=image_height,
image_width=image_width,
)
prediction_details = prepare_prediction_details(detection=detection)
detections.append(
Detection(
bounding_box=detection_bounding_box,
prediction_details=prediction_details,
)
)
return InferenceResult(
image_height=image_height,
image_width=image_width,
detections=detections,
)
def prepare_detection_bounding_box(
detection: pd.Series, image_height: int, image_width: int
) -> BoundingBox:
left_top_x = detection.xmin / image_width
left_top_y = detection.ymin / image_height
right_bottom_x = detection.xmax / image_width
right_bottom_y = detection.ymax / image_height
return BoundingBox(
left_top_x=left_top_x,
left_top_y=left_top_y,
right_bottom_x=right_bottom_x,
right_bottom_y=right_bottom_y,
)
def prepare_prediction_details(detection: pd.Series) -> PredictionDetails:
return PredictionDetails(
confidence=detection.confidence,
class_id=detection["class"],
class_name=detection["name"],
)