In [13]:
import win32api
import win32con
import win32gui
import time
import torch
from torchvision import transforms
from kanji_detection_model import kanji_detector
import json


from PIL import ImageGrab, Image

#for testing purposes
import pathlib
from os import listdir
from os.path import isfile, join
import random
import math

In [14]:
num2kanji_dict = {}
with open('num2kanji_dict.json', 'r') as f:
  num2kanji_dict = json.load(f)


device = "cuda" if torch.cuda.is_available() else "cpu"
#detectionModel = torch.load('./Models/kanji_model_96_1.pt', map_location=torch.device('cpu'))
detectionModel = kanji_detector().to(device=device)
detectionModel.load_state_dict(torch.load('./Models/kanji_model_98_1.pth'))

<All keys matched successfully>

In [16]:
def identifySymbol(image: Image, top_k: int = 5):
    convert_tensor = transforms.Compose([
        transforms.Grayscale(),
        transforms.Resize(64),
        transforms.ToTensor()
    ])
    
    with torch.no_grad():
        tensor_image = convert_tensor(image)
        activeModel = detectionModel.to(device=device)
        activeModel.eval()
        result = activeModel(tensor_image.unsqueeze(0).to(device=device))
        _,most_likely_indices = result.squeeze().topk(k=top_k,dim=0)
        selections = (most_likely_indices+1).tolist()
        kanjis = [num2kanji_dict[str(k)] for k in selections]
        print(kanjis)
        
        del activeModel
        del tensor_image
    

In [None]:
def isPressed(key) -> bool:
    return (win32api.GetKeyState(key) & ~1) != 0 # Left button down = 0 or 1. Button up = -127 or -128

capture_size = 40
half_capture_size = capture_size//2

state_left = isPressed(win32con.VK_LBUTTON)
state_ctrl = isPressed(win32con.VK_CONTROL)

while True:
    a = isPressed(win32con.VK_LBUTTON)
    state_ctrl = isPressed(win32con.VK_CONTROL)
    
    if a != state_left: # Button state changed
        state_left = a
            
        if a and state_ctrl:
            isHolding = True
            posClick = win32gui.GetCursorPos()
            rect = (posClick[0]-half_capture_size, 
                    posClick[1]-half_capture_size,
                    posClick[0]+half_capture_size,
                    posClick[1]+half_capture_size)
            pic = ImageGrab.grab(bbox=rect)
            identifySymbol(pic, top_k=10)
            #print('Left Button Pressed')
                

['的', '以', '冶', '丑', '腺', '煎', '切', '欧', '認', '内']
['句', '彰', '遠', '塗', '拷', '唯', '議', '歳', '新', '炭']
['乾', '泌', '笠', '三', '遠', '交', '芯', '堂', '公', '映']
['乱', '格', '七', '桐', '策', '照', '丙', '歳', '記', '寸']
['乱', '議', '遠', '盲', '認', '幸', '彰', '格', '弦', '泌']
['託', '笠', '欲', '侯', '零', '酵', '究', '脇', '喪', '表']
['議', '彰', '厚', '遠', '乾', '泌', '乱', '菌', '考', '荒']
['乱', '格', '議', '遠', '策', '販', '互', '針', '姿', '代']
['乾', '遠', '議', '泌', '四', '厚', '乱', '菌', '寛', '芯']
['乱', '彰', '議', '認', '盲', '厚', '遠', '幸', '泌', '唯']
['新', '句', '賄', '認', '飽', '唯', '詣', '穏', '房', '崖']
['田', '畑', '句', '舶', '好', '善', '取', '柵', '存', '銘']
['財', '牧', '茨', '洗', '興', '柔', '整', '詳', '嗅', '資']
['財', '柔', '女', '壌', '夜', '茨', '看', '埼', '泉', '麻']
['勺', '肌', '漆', '伺', '勧', '布', '軍', '智', '頃', '幸']


In [5]:
trainingPath = pathlib.Path().resolve() / "Training_set"
picturesNames = [f for f in listdir(trainingPath) if isfile(join(trainingPath, f))]

dictNames = {name:{'number':int(name.split('_')[0]) , 'symbol':name.split('_')[1]} for name in picturesNames}
tempDictNames = dictNames.copy()

image_size=64
nb_symbols=2199
#print(dictNames[picturesNames[0]])

def selectBatches(dictNames, tempDictNames, batch_size) -> list:
    batch=[]
    while len(batch) < batch_size:
        n_to_find = batch_size-len(batch)
        if len(tempDictNames) >= n_to_find:
            sample = random.sample(list(tempDictNames.items()), n_to_find)
            batch.extend(sample)
            for item in sample:
                del tempDictNames[item[0]]
        else:
            batch.extend(tempDictNames.items())
            tempDictNames = dictNames.copy()

    return batch

def getAnswerIndices(batchList) -> torch.FloatTensor:
    
    correctAnswer = torch.zeros((len(batchList),nb_symbols)).float()
    correctAnswerIndices = torch.zeros(len(batchList)).long()
    
    for i in range(len(batchList)):
        indexCorrect = batchList[i][1]['number']-1
        correctAnswer[i][indexCorrect] = 1
        correctAnswerIndices[i] = indexCorrect
    
    
    return correctAnswer, correctAnswerIndices

def countCorrect(answer: torch.FloatTensor, correctAnswer: torch.FloatTensor):
    
    
    _,indicesAnswer = torch.max(answer, dim=1)
    _,indicesCorrect = torch.max(correctAnswer, dim=1)
    
    #print(indicesAnswer)
    #print(indicesCorrect)
    numCorrect = (indicesAnswer == indicesCorrect).long().sum()
    
    return numCorrect.item()
    
def getPictures(batchList) -> torch.FloatTensor:
    
    images = torch.zeros((len(batchList), 1, image_size, image_size)).float()
    convert_tensor = transforms.Compose([
        transforms.Grayscale(),
        transforms.Resize(image_size),
        transforms.ToTensor()
    ])
    
    for i in range(len(batchList)):
        item = batchList[i]
        filename = item[0]
        img = Image.open(trainingPath / filename)
        images[i,:,:,:] = convert_tensor(img)
        
    return images

In [6]:
def eval(model, n_epoch, batch_size):
    n_batches = 1
    
    with torch.no_grad():

        model.eval()
        loss_f = torch.nn.CrossEntropyLoss()
        best_percent = 0
        for epoch in range(n_epoch):
            #print("Epoch " + str(epoch+1) + " is running")
            n_correct = 0
            n_total = n_batches*batch_size
            for i in range(n_batches):

                batch = selectBatches(dictNames, tempDictNames, batch_size)
                images = getPictures(batch)
                correct_answer, correct_answer_indices = getAnswerIndices(batch)

                answer = model(images.to(device=device))
                loss = loss_f(answer,correct_answer_indices.to(device=device)).cpu()

                print(answer)
                n_correct += countCorrect(answer, correct_answer.to(device=device))

            adjust = 10000
            percent = math.floor(adjust*100*n_correct/n_total)/adjust
            best_percent = percent if percent > best_percent else best_percent
            print("Epoch " + str(epoch+1) +" evaluation accuracy : " + str(percent) + "%\n")
        
    return best_percent

In [7]:
eval(detectionModel,1,100)

tensor([[-3.7491, -2.7350, -3.2033,  ..., -3.8105, -3.3552, -2.9861],
        [-2.9835, -2.7410, -3.6033,  ..., -3.2384, -2.8220, -2.2905],
        [-3.8220, -2.8390, -4.6016,  ..., -3.8309, -3.0583, -2.7301],
        ...,
        [-3.5030, -2.5716, -2.9183,  ..., -3.5637, -3.3498, -2.9802],
        [-3.4549, -2.2524, -2.9277,  ..., -3.3852, -3.1757, -3.4982],
        [-3.6615, -3.1105, -2.6145,  ..., -3.4711, -3.3693, -2.9698]],
       device='cuda:0')
Epoch 1 evaluation accuracy : 7.0%



7.0

In [3]:
width = win32api.GetSystemMetrics(0)
height = win32api.GetSystemMetrics(1)
midWidth = int((width + 1) / 2)
midHeight = int((height + 1) / 2)


"""
dc = win32gui.GetDC(0)
dcObj = win32ui.CreateDCFromHandle(dc)
hwnd = win32gui.WindowFromPoint((0,0))
"""
rect = (0, 0, width, height)
window = (0, 0, width, height)


app = wx.App(False)
s = wx.ScreenDC()
s.Pen = wx.Pen(wx.Colour(0,0,0,255),style=wx.TRANSPARENT)
s.StartDrawingOnTop()



state_left = isPressed(win32con.VK_LBUTTON)
state_ctrl = isPressed(win32con.VK_CONTROL)
print(state_left)
print(state_ctrl)

isHolding = False

posStart = win32gui.GetCursorPos()
posEnd = win32gui.GetCursorPos()

brush = CreateBrush()
brush.CreateSolidBrush(win32api.GetSysColor(255))



False
False


In [3]:
class DrawPanel(wx.Panel):
    
    def __init__(self, *args, **kw):
        super().__init__(*args, **kw)
        
        self.Bind(wx.EVT_PAINT, self.on_paint)
        self.Bind(wx.EVT_MOTION, self.on_mouse_motion)
        
    def on_mouse_motion(self, event):
        self.Parent.Refresh()
        
    def on_paint(self, event):
        self.dc = wx.PaintDC(self)
        #self.dc.DrawRectangle(rect)
        self.dc.DrawRectangle(0,0,100,100)

In [4]:

while True:
    a = isPressed(win32con.VK_LBUTTON)
    state_ctrl = state_ctrl if state_left else isPressed(win32con.VK_CONTROL)
    
    if (a != state_left): # Button state changed
        state_left = a
        if state_ctrl:
            
            if a:
                isHolding = True
                posStart = win32gui.GetCursorPos()
                posEnd = win32gui.GetCursorPos()
                #hwnd = win32gui.WindowFromPoint((0,0))
                print('Left Button + Ctrl Pressed')
                
                
            else:
                isHolding = False
                posEnd = win32gui.GetCursorPos()
                print('Left Button + Ctrl Released')
                
                s.Clear()
                
                #win32gui.InvalidateRect(hwnd, window, False)
                #win32api.SetCursorPos((midWidth, midHeight))
        
    if isHolding:
        posEnd = win32gui.GetCursorPos()
        """
        win32gui.RedrawWindow(hwnd, None, None, win32con.RDW_INVALIDATE | win32con.RDW_ERASE)
        win32gui.UpdateWindow(hwnd)
        rect = (posStart[0], posStart[1], posEnd[0], posEnd[1])
        dcObj.FrameRect(rect,brush)
        """
        
        
        
        rect = (posStart[0], posStart[1], posEnd[0]-posStart[0], posEnd[1]-posStart[1])
        s.DrawRectangle(rect)
        s.Refresh()
        #app.Update()
        
        
        rect = (posStart[0], posStart[1], posEnd[0]-posStart[0], posEnd[1]-posStart[1])
    
    time.sleep(0.01)
s.EndDrawingOnTop()

Left Button + Ctrl Pressed
Left Button + Ctrl Released
Left Button + Ctrl Pressed
Left Button + Ctrl Released
Left Button + Ctrl Pressed
Left Button + Ctrl Released


KeyboardInterrupt: 