In [None]:
import torch
from PIL import Image, ImageGrab, ImageDraw, ImageFont
from torchvision import transforms, models
import cv2
import numpy as np
import pyWinhook
import pythoncom
import time
import win32api

import import_ipynb
import RequiredPai_for_import as RP
from Separation import g_EgoHandCropLeftUpperX_int, g_EgoHandCropLeftUpperY_int, \
                       g_EgoHandCropRightLowerX_int, g_EgoHandCropRightLowerY_int, \
                       g_EgoHandCropMinPaiArea_int, g_EgoHandGaussKSize_int

In [None]:
# define global constants

# since the data are not "nature" objects, pre-calulate the mean and std for normalization
# see the notebook NormalizationSupport
# note: these value are not imported from NormalizationSupport, because it will re-calculte every time when you import
g_NormMean_lst = [0.7406573632924783, 0.7232460663149365, 0.6959515512913943]
g_NormStd_lst = [0.22704669694242244, 0.24139483459696767, 0.2690579683428062]

# average size of input data; will be used for input data scalling; depending on your screen size!
# see the notebook NormalizationSupport
# note: these value are not imported from NormalizationSupport, because it will re-calculte every time when you import
g_AvgW_int = 83
g_AvgH_int = 136
# scale factor and final input image size which fulfill the 224x224 requirement
g_ScaleFactor_int = max(224//g_AvgW_int, 224//g_AvgH_int)+1 # in case original input size already big enough, the factor would be one
g_UpSampledW_int = g_ScaleFactor_int*g_AvgW_int
g_UpSampledH_int = g_ScaleFactor_int*g_AvgH_int

# normal font path
# ImageDraw will search certain default paths if you only give the font name; please refer the documentation
g_FontPath_str = 'arial.ttf'

# PAI font path; should be ttf format and support extend utf-8 characters of Mahjong tiles
# current choise: BabelStoneHan
g_PAIFontPath_str = r'..\..\..\..\AppData\Local\Microsoft\Windows\Fonts\BabelStoneHan.ttf'

# path for pre-trained model
g_PreTrainedModelPath_str = r'Model\20210317_Trial1.pth'

In [None]:
# reverse dictionary to get required PAIs
g_PAI_Reverse_Dict_Prediction_c = {0 : '1m', \
                                   1 : '2m', \
                                   2 : '3m', \
                                   3 : '4m', \
                                   4 : '5m', \
                                   5 : '6m', \
                                   6 : '7m', \
                                   7 : '8m', \
                                   8 : '9m', \
                                   9 : '1p', \
                                   10 : '2p', \
                                   11 : '3p', \
                                   12 : '4p', \
                                   13 : '5p', \
                                   14 : '6p', \
                                   15 : '7p', \
                                   16 : '8p', \
                                   17 : '9p', \
                                   18 : '1s', \
                                   19 : '2s', \
                                   20 : '3s', \
                                   21 : '4s', \
                                   22 : '5s', \
                                   23 : '6s', \
                                   24 : '7s', \
                                   25 : '8s', \
                                   26 : '9s', \
                                   27 : 'E', \
                                   28 : 'S', \
                                   29 : 'W', \
                                   30 : 'N', \
                                   31 : 'R', \
                                   32 : 'G', \
                                   33 : 'Wh', \
                                  }

In [None]:
def MyLoader(f_ImgPointer_str):
    global g_ScaleFactor_int
    global g_UpSampledW_int
    global g_UpSampledH_int
    
    # open the picture with PIL
    # upsample
    # rescale it to [0, 1] and convert to tensor
    # center crop
    # normalize
    
    # "open the picture"
    l_im_img = f_ImgPointer_str
    
    # upsample
    if g_ScaleFactor_int > 1: 
        l_im_img = l_im_img.resize((l_im_img.size[0]*g_ScaleFactor_int, l_im_img.size[1]*g_ScaleFactor_int), resample=Image.NEAREST)
    
    # to tensor (implicated also rescale to [0, 1])
    # note: ToTensor is a class!
    l_im_tsr = transforms.ToTensor()(l_im_img)

    # normalize; note: also a class!
    l_im_tsr = transforms.Normalize(g_NormMean_lst, g_NormStd_lst)(l_im_tsr)
    
    # Center Crop
    l_im_tsr = transforms.CenterCrop((g_UpSampledH_int, g_UpSampledW_int))(l_im_tsr)

    # return
    return l_im_tsr

In [None]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, imgpointers, loader=MyLoader, transform=None):
        self.images = imgpointers
        self.loader = loader
        self.transform = transform

    def __getitem__(self, index):
        imgpointer = self.images[index]
        img = self.loader(imgpointer)
        if self.transform: img = self.transform(img)
        
        return img

    def __len__(self):
        return len(self.images)

In [None]:
def SeparationEgoHand(f_ScreenShot_img):
    global g_EgoHandOutputFolderPath_str
    global g_EgoHandCropLeftUpperX_int
    global g_EgoHandCropLeftUpperY_int
    global g_EgoHandCropRightLowerX_int
    global g_EgoHandCropRightLowerY_int
    global g_EgoHandCropMinPaiArea_int
    global g_EgoHandGaussKSize_int
    
    # "open image"
    l_im_img = f_ScreenShot_img
    
    # crop the interesting part
    l_im_img = l_im_img.crop((g_EgoHandCropLeftUpperX_int, g_EgoHandCropLeftUpperY_int, \
                              g_EgoHandCropRightLowerX_int, g_EgoHandCropRightLowerY_int))
    
    # convert into 8 bit gray scale image for further operation
    l_im_gs_img = l_im_img.convert('L')
    l_im_gs_npa = np.array(l_im_gs_img) # convert to np array for cv2
    
    # originally from game screenshot, therefor gaussian noise filter not necessary
    # still use the gaussian for better contour detection
    l_im_gs_npa = cv2.GaussianBlur(l_im_gs_npa, (g_EgoHandGaussKSize_int, g_EgoHandGaussKSize_int), 0)
    
    # detect edges with canny
    l_im_edge_npa = cv2.Canny(l_im_gs_npa, 30, 100)
    
    # detect counters based on edges
    # only outside edges and simple 3 directions for edges
    l_contour, _ = cv2.findContours(l_im_edge_npa, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2:]
    
    # find rectangles for each contour
    # initialize output list
    l_OutputImages_lst = []
    l_AnchorCoords_lst = []
    for c in l_contour:
        x, y, w, h = cv2.boundingRect(c)
        
        # in case rectangles big enough
        if w*h >= g_EgoHandCropMinPaiArea_int:
            # get the correspoding area from original image
            l_im_Pai_img = l_im_img.crop((x, y, x+w, y+h))
            # append to output list
            l_OutputImages_lst.append(l_im_Pai_img)
            # append the anchor coordinates in original
            l_AnchorCoords_lst.append([x+g_EgoHandCropLeftUpperX_int,y+g_EgoHandCropLeftUpperY_int, w, h])

    # return
    return l_OutputImages_lst, l_AnchorCoords_lst

In [None]:
def DrawPrediction(f_im_img, f_AnchorCoords_lst, f_PredictedPAI_lst, f_NecessaryPAI_lst):
    # find rectangles for each contour
    # get the drawer
    l_Drawer = ImageDraw.Draw(f_im_img)
    # get the font
    font = ImageFont.truetype(g_FontPath_str, size=30)

    # loop over all elements
    for i in range(len(f_PredictedPAI_lst)):
        x = f_AnchorCoords_lst[i][0]
        y = f_AnchorCoords_lst[i][1]
        w = f_AnchorCoords_lst[i][2]
        h = f_AnchorCoords_lst[i][3]
        
        # draw rectangle on the separated area
        l_Drawer.rectangle([x, y, x+w, y+h], outline='red', width=3)
        l_Drawer.text((x+w/2, y-5), f_PredictedPAI_lst[i], fill='red', font=font, anchor='mb')

    #---------------------------------------------------------------------------------------
    # get the font
    l_PAI_FontSize_int = 40
    font = ImageFont.truetype(g_PAIFontPath_str, size=l_PAI_FontSize_int)
    
    # generate the lines and decide the width of the background box
    l_BGWidth_int = 0
    l_Lines_lst = []
    for NecPAIs in f_NecessaryPAI_lst:
        text = '打'
        text += RP.g_PAI_Disp_c[NecPAIs[0]]
        text += '摸'
        for PAI in NecPAIs[2]: text += RP.g_PAI_Disp_c[PAI]
        text += str(NecPAIs[3])
        text += '枚'
        l_Lines_lst.append(text)
        l_BGWidth_int = max(l_BGWidth_int, l_Drawer.textsize(text, font=font)[0])

    # set the anchor
    l_BGWidth_int += 10
    l_BGHeight_int = len(f_NecessaryPAI_lst) * (l_PAI_FontSize_int + 5)
    l_AnchorX_int = f_im_img.size[0]/2-l_BGWidth_int/2
    l_AnchorY_int = f_im_img.size[1]/2-l_BGHeight_int/2
    l_Drawer.rectangle([l_AnchorX_int, l_AnchorY_int, l_AnchorX_int+l_BGWidth_int, l_AnchorY_int+l_BGHeight_int], fill='white')   
    
    # move anchor
    l_AnchorX_int += 5
    l_AnchorY_int += 5
    # print background for required PAIs information
    for text in l_Lines_lst:
        l_Drawer.text((l_AnchorX_int, l_AnchorY_int), text, fill='black', font=font, anchor='lt')
        # move anchor
        l_AnchorY_int += l_PAI_FontSize_int + 5
    
    # return
    return

In [None]:
def CalNecessaryPAIsBasedOnPredictedHand(f_Hand_lst):
    # get the input hand sorted
    hand = sorted(f_Hand_lst, key=lambda PAI: RP.g_PAI_Dict_c[PAI])
    
    # loop over all PAIs, try to play it and calcualte the number of necessary PAIs / required PAIs
    # initialize variable(s)
    min_Num_Nec = RP.g_invalid_Num_Of_Necessary_PAIs_c
    list_Num_Nec_Required_PAIs = [] # played PAI, number of necessary PAIs, required PAIs, number of required PAIs
    # loop once over all PAIs in hand to remove them from the current pool
    l_CurPAINum_dict = RP.g_PAI_Num_c.copy()
    for PAI in hand: l_CurPAINum_dict[PAI] -= 1    
    # loop
    for played_PAI in hand:
        # only check not duplicated PAIs
        if list_Num_Nec_Required_PAIs == [] \
        or not played_PAI == list_Num_Nec_Required_PAIs[-1][0]:
            # remove the played PAI from hand
            temp_hand = hand.copy(); temp_hand.remove(played_PAI)
            # calculate the number of necessary PAIs
            temp_required_PAIs = set()
            temp_num_Nec = RP.CalNumOfNecessaryPAIs(temp_hand, temp_required_PAIs)
            temp_required_PAIs = sorted(temp_required_PAIs, key=lambda PAI: RP.g_PAI_Dict_c[PAI])
            # add the output into list
            min_Num_Nec = min(min_Num_Nec, temp_num_Nec)
            list_Num_Nec_Required_PAIs.append([played_PAI, temp_num_Nec, temp_required_PAIs, \
                                               RP.GetRemainingNumOfRequiredPAIs(l_CurPAINum_dict, temp_required_PAIs)])

    # sort the output list by number of required PAIs
    list_Num_Nec_Required_PAIs = sorted(list_Num_Nec_Required_PAIs, key=lambda out_list: out_list[3], reverse=True)
    # remove all options which can not provide the mininal number of necessary PAIs
    index = 0
    while index < len(list_Num_Nec_Required_PAIs):
        if list_Num_Nec_Required_PAIs[index][1] == min_Num_Nec:
            index += 1
        else:
            list_Num_Nec_Required_PAIs.pop(index)
    
    # return the list of necessary PAIs
    return list_Num_Nec_Required_PAIs

In [None]:
def onKeyboardEvent(f_event):
    global g_hm
    global g_SaveImagePath_str
    global g_Net

    # when 'C' is pressed, save screen shot of current screen
    # naming rule: yyyymmdd_hhmmss
    if f_event.Key == 'C':
        # get current screenshot
        l_im_img = ImageGrab.grab()
        
        # get separated PAI list
        l_im_PAIs_lst, l_AnchorCoords_lst = SeparationEgoHand(l_im_img)
        
        # get dataloader
        l_ValidationData_DaSt = MyDataset(l_im_PAIs_lst)
        l_ValidationData_DaLder = torch.utils.data.DataLoader(l_ValidationData_DaSt, batch_size=len(l_im_PAIs_lst), shuffle=False)
        
        # send throw net
        for i, data in enumerate(l_ValidationData_DaLder, 0):
            outputs = g_Net(data)
            _, predicted = torch.max(outputs, 1)
            # get the PAI from index
            l_PredictedPAI_lst = [g_PAI_Reverse_Dict_Prediction_c[x.item()] for x in predicted]
            # calculated the required PAIs
            l_NecessaryPAI_lst = CalNecessaryPAIsBasedOnPredictedHand(l_PredictedPAI_lst)
            # display the PAIs on original image
            DrawPrediction(l_im_img, l_AnchorCoords_lst, l_PredictedPAI_lst, l_NecessaryPAI_lst)
        
        # show the image
        l_im_img.show()
        
        # release memory
        l_im_img.close()
        
    # when 'Escape' is pressed, stop monitoring & hook
    if f_event.Key == 'Escape':
        win32api.PostQuitMessage() # send quit message
        g_hm.UnhookKeyboard() # unhook the mouse
        print('Hook stopped.')

    # return
    return True

In [None]:
# initialize and load the model
# get the ResNet18 model as base and replace the last fully connected layer with custom output
# after that load local pretrained parameter
g_Net = models.resnet18(pretrained=False, progress=False)
in_ftr  = g_Net.fc.in_features
out_ftr = 34 # 34 different PAIs available
g_Net.fc = torch.nn.Linear(in_ftr,out_ftr,bias=True)
g_Net.load_state_dict(torch.load(g_PreTrainedModelPath_str))

In [None]:
# main function

# get hook manager
g_hm = pyWinhook.HookManager()
# set call back function for keyboard hook
g_hm.KeyDown = onKeyboardEvent
# start keyboard hook
g_hm.HookKeyboard()

# start monitering
pythoncom.PumpMessages()