In [1]:
import cv2
import numpy as np
import torch
from torchvision import datasets, transforms #提供資料集 演算法
from pathlib import Path
from PIL import Image

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = Path('./models/model.pt') 
model = torch.jit.load(model_path).to(device)
if model:
    print("Model Imported")
    # print(model)

dataset = datasets.ImageFolder(root='./hiragana-dataset-master/process') #使用ImageFolder加載資料集
label_dict = dataset.class_to_idx #生出 label:index 的字典
reverse_label_dict = {v: k for k, v in label_dict.items()} #原始字典 label_dict 的key值對交換後生成一個新的字典 reverse_label_dict
print(reverse_label_dict)

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((64,64)), #
    transforms.ToTensor(),  # shape H，W，C —> C，H，W
    transforms.Normalize((0.1307,), (0.3081,)) # 將資料壓縮至一個範圍，例如：-1,1，這樣跑的效率會比較高
])

Model Imported
{0: 'kanaA', 1: 'kanaBA', 2: 'kanaCHI', 3: 'kanaDA', 4: 'kanaE', 5: 'kanaFU', 6: 'kanaHA', 7: 'kanaHE', 8: 'kanaHI', 9: 'kanaHO', 10: 'kanaI', 11: 'kanaJI', 12: 'kanaKA', 13: 'kanaKE', 14: 'kanaKI', 15: 'kanaKO', 16: 'kanaKU', 17: 'kanaMA', 18: 'kanaME', 19: 'kanaMI', 20: 'kanaMO', 21: 'kanaMU', 22: 'kanaN', 23: 'kanaNA', 24: 'kanaNE', 25: 'kanaNI', 26: 'kanaNO', 27: 'kanaNU', 28: 'kanaO', 29: 'kanaPI', 30: 'kanaRA', 31: 'kanaRE', 32: 'kanaRI', 33: 'kanaRO', 34: 'kanaRU', 35: 'kanaSA', 36: 'kanaSE', 37: 'kanaSHI', 38: 'kanaSO', 39: 'kanaSU', 40: 'kanaTA', 41: 'kanaTE', 42: 'kanaTO', 43: 'kanaTSU', 44: 'kanaU', 45: 'kanaWA', 46: 'kanaWO', 47: 'kanaYA', 48: 'kanaYO', 49: 'kanaYU'}


In [3]:
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print("Cannot open camera")
    exit()
while True:
    ret, img = cap.read()
    result = "None"
    if not ret:
        print("Cannot receive frame")
        break
    img = cv2.resize(img,(720,480))                # 改變影像尺寸，加快處理效率
    x, y, w, h = 350, 200, 110, 110                # 定義擷取的區域位置和大小
    img_word_pred = img.copy()                     # 複製一個影像作為辨識使用
    img_word_pred = img_word_pred[y:y+h, x:x+w]    # 擷取辨識的區域
    
    ### 將辨識圖片放置在右上方 ###
    img_word = cv2.cvtColor(img_word_pred, cv2.COLOR_BGR2GRAY)
    output = cv2.cvtColor(img_word, cv2.COLOR_GRAY2BGR)
    img[0:110, 610:720] = output
    
    ### 辨識圖片 ###
    img_word_pred = Image.fromarray(cv2.cvtColor(img_word_pred, cv2.COLOR_BGR2GRAY)) #圖片轉成JIL img
    img_word_pred = transform(img_word_pred).to(device)    #圖片前處理
    # print(img_word_pred.shape) #torch.Size([1, 64, 64])
    pred = model(img_word_pred.unsqueeze(0))
    _, pred_label = pred.max(1)
    result = reverse_label_dict.get(int(pred_label))

    org = (x-20,y-30)                       # 印出的文字位置
    fontFace = cv2.FONT_HERSHEY_SIMPLEX     # 印出的文字字體
    fontScale = 2                           # 印出的文字大小
    color = (0,0,255)                       # 印出的文字顏色
    thickness = 2                           # 印出的文字邊框粗細
    lineType = cv2.LINE_AA                  # 印出的文字邊框樣式
    cv2.putText(img, result, org, fontFace, fontScale, color, thickness, lineType) # 印出文字
    
    cv2.rectangle(img,(x,y),(x+w,y+h),(0,0,255),3)  # 標記辨識的區域
    cv2.imshow('WordPred', img)
    if cv2.waitKey(50) == ord('q'):
        break     # 按下 q 鍵停止
cap.release()
cv2.destroyAllWindows()