# 1. 라이브러리 로드

In [1]:
import argparse
import cv2

from yolo import YOLO

# 2. Hand Detection

### 2-1. Hand Detection Yolo Model 로드

In [2]:
yolo = YOLO("models/cross-hands-tiny-prn.cfg", "models/cross-hands-tiny-prn.weights", ["hand"])

yolo.size = 416  # int(args.size)
yolo.confidence = 0.2  # float(args.confidence)

conf_sum = 0
detection_count = 0

### 2-2. 이미지 파일 로드

In [3]:
img_file_name = "drink_1_freehand"
img_file_path = "datasets/images/" + img_file_name + ".jpg"

mat = cv2.imread(img_file_path)
width, height, inference_time, results = yolo.inference(mat)

### 2-3. Hand Detection 수행 및 영역 정보 저장

In [4]:
hand_pos = [] # x1, y1, x2, y2

detection_count = 0

for detection in results:
    id, name, confidence, x, y, w, h = detection
    cx = x + (w / 2)
    cy = y + (h / 2)

    conf_sum += confidence
    detection_count += 1

    # draw a bounding box rectangle and label on the image
    color = (255, 0, 255)
    cv2.rectangle(mat, (x, y), (x + w, y + h), color, 3)
    text = "%s (%s)" % (name, round(confidence, 2))
    cv2.putText(mat, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX,
                1, color, 3)

    print("%s with %s confidence" % (name, round(confidence, 2)))
    # cv2.imwrite("export.jpg", mat)
    
    hand_pos.extend([x, y, x+w, x+h])

# show the output image
mini_mat = cv2.resize(mat, dsize=(608, 608))
cv2.imshow('image', mini_mat)
cv2.waitKey(0)

if detection_count != 1:
    print("화면에는 하나의 손이 나와야 합니다")
    print("현재 화면에", detection_count, '개의 손이 감지되었습니다.')
    exit()
    
print("손 검출 시작 좌표(왼쪽위) :", hand_pos[0], hand_pos[1])
print("손 검출 끝 좌표(오른쪽아래) :", hand_pos[2], hand_pos[3])
print("손 검출 영역 크기 :", hand_pos[2]*hand_pos[3])

hand with 0.96 confidence
손 검출 시작 좌표(왼쪽위) : 277 552
손 검출 끝 좌표(오른쪽아래) : 569 703
손 검출 영역 크기 : 400007


# 3. Product Detection

### 3-1. Product Detection 수행 및 좌표 저장

* --weight : you can change the number of class that the system will detecth throught the weight file (weights/...)
* --img : you can change the size of img
* --conf : you can chagne the confidence threshold level
* --source : input file that you want to detect the product and hand

In [5]:
!python yolov5/detect.py --weights weights/best_10class_150.pt --img 416 --conf 0.3 --source datasets/images/drink_1_freehand.jpg --line-thickness 4 --save-txt

source : datasets/images/drink_1_freehand.jpg → drink_1_freehand
save_dir : datasets\results\drink_1_freehand


[34m[1mdetect: [0mweights=['weights/best_10class_150.pt'], source=datasets/images/drink_1_freehand.jpg, data=yolov5\data\coco128.yaml, imgsz=[416, 416], conf_thres=0.3, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=yolov5\runs\detect, name=exp, exist_ok=False, line_thickness=4, hide_labels=False, hide_conf=False, half=False, dnn=False
fatal: cannot change to 'G:\�궡': No such file or directory
YOLOv5  2022-6-12 Python-3.10.4 torch-1.11.0 CUDA:0 (GeForce GTX 1060 6GB, 6144MiB)

Fusing layers... 
custom_YOLOv5s summary: 232 layers, 7270791 parameters, 0 gradients
image 1/1 G:\내 드라이브\01. 숙명여대 석사\2학기 (석사 2022-1)\확률이론기반정보처리특론\과제 및 발표자료\code\datasets\images\drink_1_freehand.jpg: 416x384 3 Protein_Yogurts, Done. (0.012s)
Speed: 0.0ms pre-process, 12.0ms inference, 5.0ms NMS per image at shape (1, 3, 416, 416)
Results saved to [1mdat

# 4. 좌표 계산 및 상품 선택

### 4-1. 상품 검출 좌표정보 불러오기

In [6]:
import pandas as pd

product_pos_path = "datasets/results/" +  img_file_name + "/" + img_file_name + ".txt"

product_pos = pd.read_csv(product_pos_path, sep = ' ', names=['class', 'x1', 'y1', 'x2', 'y2'])
product_pos

Unnamed: 0,class,x1,y1,x2,y2
0,6,336,89,640,1029
1,6,7,102,254,680
2,6,207,97,443,965


### 4-2. 손 검출 영역과 중첩된 영역 크기 계산하기

In [7]:
size_of_overlap = []

for row in range(0, product_pos.shape[0]):
    selected_box_pos = product_pos.loc[row]

    # 가로 길이 계산
    width = abs(abs(hand_pos[2] - selected_box_pos[3]) - abs(hand_pos[0] - selected_box_pos[1]))

    # 세로 길이 계산
    height = abs(abs(hand_pos[3] - selected_box_pos[4]) - abs(hand_pos[1] - selected_box_pos[2]))

    # 총 영역 계산
    area = width * height
    size_of_overlap.append(area)
    
print(size_of_overlap) # 가장 작은 값이 가장 많은 영역이 겹친 것 

[1644, 19215, 10808]


### 4-3. 최대 중첩 아이템 선택

In [8]:
min_idx = size_of_overlap.index(min(size_of_overlap))
item_num = product_pos.loc[min_idx, 'class']
print("최대 중첩 아이템(클래스) 번호 :", item_num)

최대 중첩 아이템 : 6


# 5. 상품 정보 불러오기 및 TTS 출력

### 5-1. 최대 중첩항목의 상품 정보 파일 찾기

In [9]:
import os

food_info_path = "datasets/food_info/"

food_info_list = os.listdir(food_info_path)
print(food_info_list)

['[6] Protein_Yogurt.xml', '[8] mint_latte.xml', '[9] seoul_milk.xml', '[0] Cass_Light.xml', '[1] Cheetos.xml', '[2] Corn_Snack.xml', '[3] Corn_Snack_Roasted.xml', '[4] Hite_ExtraCold.xml', '[5] Kloud.xml', '[7] Shrimp_Crackers.xml']


In [10]:
product_file = ""

for file in food_info_list:
    end_idx = file.find(']')
    
    product_num = file[1:end_idx]
    
    if int(product_num) == item_num:
        product_file = file

print(product_file)

[6] Protein_Yogurt.xml


### 5-2. 상품 정보 불러오기

In [11]:
import xml.etree.ElementTree as ET
tree = ET.parse(food_info_path + product_file)

In [12]:
from ast import literal_eval

root = tree.getroot()
div_cd = root[1]

# 기본 정보
img_prod_nm = div_cd.find('img_prod_nm').text  # 상품명.text
vol = div_cd.find('volume').text  # 용량.text
print(img_prod_nm, vol)

# 상세 정보
nut_info = div_cd.find('nutrition_info')

nut_dict = eval(nut_info.text)

print(nut_dict)

serv_size = nut_dict['1회제공량']
kcal = nut_dict['에너지(㎉)']
protein = nut_dict['단백질(g)']
fat = nut_dict['지방(g)']
carbohydrate = nut_dict['탄수화물(g)']
print(serv_size, kcal, protein, fat, carbohydrate)

프로틴고단백질요거트 210ML
{'1회제공량': '', '총내용량(g)': '', '총내용량(mL)': '210', '에너지(㎉)': '185', '단백질(g)': '18.7', '지방(g)': '3.9', '탄수화물(g)': '19', '총당류(g)': '16', '총 식이섬유(g)': '', '칼슘(㎎)': '265', '철(㎍)': '', '마그네슘(㎎)': '', '카페인(㎎)': '', '칼륨(㎎)': '', '나트륨(㎎)': '130', '비타민': '', '콜레스테롤(㎎)': '60', '총 지방산(g)': ''}
 185 18.7 3.9 19


### 5-3. TTS 출력(저장)

In [13]:
def replace_text(original):
    changed = original.replace('ML', ' 밀리리터')
    changed = changed.replace("g", ' 그램')
    
    return changed

In [14]:
text_total = img_prod_nm + "." + " 총 용량은 " + vol + "." 
text_serve = " 1회 제공량은 " + str(vol if serv_size == "" else serv_size) + "."
text_detail = " 칼로리는 " + kcal + "." + " 단백질은 " + protein + "." + " 지방은 " + fat + "." + " 탄수화물은 " + carbohydrate + "."

text_final = text_total + text_serve + text_detail
text_final = replace_text(text_final)

print(text_final) 

프로틴고단백질요거트. 총 용량은 210 밀리리터. 1회 제공량은 210 밀리리터. 칼로리는 185. 단백질은 18.7. 지방은 3.9. 탄수화물은 19.


In [15]:
from gtts import gTTS

tts = gTTS(text=text_final, lang='ko')
tts.save("datasets/results/" + img_file_name + "/" + img_file_name + ".mp3")