In [1]:
import torch
import hub
import random
import math
import PIL
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from torchinfo import summary
from torchvision import transforms

import pathlib
from os import listdir
from os.path import isfile, join

device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
image_size = 64
nb_symbols = 2199

class kanji_detector(torch.nn.Module):
    def __init__(self,dropout1=0.4, dropout2=0.2, dropout3=0.2):
        super(kanji_detector, self).__init__()
        self.sequence = torch.nn.Sequential(
            torch.nn.Conv2d(1,32,17),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout1),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(32,64,5),
            torch.nn.Dropout(dropout2),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(64,128,3),
            torch.nn.Dropout(dropout3),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Flatten(),
            torch.nn.Linear(2048,512),
            torch.nn.ReLU(),
            torch.nn.Linear(512,nb_symbols),
            #torch.nn.Softmax(-1)
        )
        
    def forward(self, input):
        out = self.sequence(input)
        return out

    
def getModel():
    return kanji_detector()


def testModel():
    modelRunnable = getModel().to(device=device)
    print(modelRunnable)
    
    summary1 = summary(
        modelRunnable,
        input_size=[
            (20, 1, image_size, image_size)
        ],
        dtypes=[torch.double, torch.double],
        depth=3
    )
    
    print(summary1)
    
    del modelRunnable
    torch.cuda.empty_cache()



In [4]:
print("Allocated : " + str(torch.cuda.memory_allocated()))
print("Reserved : " + str(torch.cuda.memory_reserved()))

#testModel()
#start = time.time()
print("Allocated : " + str(torch.cuda.memory_allocated()))
print("Reserved : " + str(torch.cuda.memory_reserved()))

Allocated : 0
Reserved : 0
Allocated : 0
Reserved : 0


In [5]:
trainingPath = pathlib.Path().resolve() / "Training_set"
picturesNames = [f for f in listdir(trainingPath) if isfile(join(trainingPath, f))]

dictNames = {name:{'number':int(name.split('_')[0]) , 'symbol':name.split('_')[1]} for name in picturesNames}
tempDictNames = dictNames.copy()

#print(dictNames[picturesNames[0]])

def selectBatches(dictNames, tempDictNames, batch_size) -> list:
    batch=[]
    while len(batch) < batch_size:
        n_to_find = batch_size-len(batch)
        if len(tempDictNames) >= n_to_find:
            sample = random.sample(list(tempDictNames.items()), n_to_find)
            batch.extend(sample)
            for item in sample:
                del tempDictNames[item[0]]
        else:
            batch.extend(tempDictNames.items())
            tempDictNames = dictNames.copy()

    return batch

def getAnswerIndices(batchList) -> torch.FloatTensor:
    
    correctAnswer = torch.zeros((len(batchList),nb_symbols)).float()
    correctAnswerIndices = torch.zeros(len(batchList)).long()
    
    for i in range(len(batchList)):
        indexCorrect = batchList[i][1]['number']-1
        correctAnswer[i][indexCorrect] = 1
        correctAnswerIndices[i] = indexCorrect
    
    
    return correctAnswer, correctAnswerIndices

def countCorrect(answer: torch.FloatTensor, correctAnswer: torch.FloatTensor):
    
    
    _,indicesAnswer = torch.max(answer, dim=1)
    _,indicesCorrect = torch.max(correctAnswer, dim=1)
    
    #print(indicesAnswer)
    #print(indicesCorrect)
    numCorrect = (indicesAnswer == indicesCorrect).long().sum()
    
    return numCorrect.item()
    
def getPictures(batchList) -> torch.FloatTensor:
    
    images = torch.zeros((len(batchList), 1, image_size, image_size)).float()
    convert_tensor = transforms.Compose([
        transforms.Grayscale(),
        transforms.Resize(image_size),
        transforms.ToTensor()
    ])
    
    for i in range(len(batchList)):
        item = batchList[i]
        filename = item[0]
        img = Image.open(trainingPath / filename)
        images[i,:,:,:] = convert_tensor(img)
        
    return images
        
#for i in range(100):
#    print(len(selectBatches(dictNames, tempDictNames)))


In [18]:

def train(model, n_epoch, batch_size, lr):
    n_batches = 100
    

    #dataset = datasets.ImageFolder(trainingPath, transform=transform)
    #dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    model.train()
    loss_f = torch.nn.CrossEntropyLoss()
    best_percent = 0
    for epoch in range(n_epoch):
        #print("Epoch " + str(epoch+1) + " is running")
        n_correct = 0
        n_total = n_batches*batch_size
        optimizer = torch.optim.Adam(model.parameters(), lr=lr/(epoch+1)**0.5)
        for i in range(n_batches):

            model.zero_grad()
            #optimizer.zero_grad()
            batch = selectBatches(dictNames, tempDictNames, batch_size)
            images = getPictures(batch)
            correct_answer, correct_answer_indices = getAnswerIndices(batch)

            answer = model(images.to(device=device))
            loss = loss_f(answer,correct_answer_indices.to(device=device)).cpu()
            
            loss.backward()
            optimizer.step()
            
            n_correct += countCorrect(answer, correct_answer.to(device=device))
            
            #print(loss.item())
            #print(torch.softmax(answer,dim=1))
            #print(correct_answer)
            #print(n_correct)

        adjust = 10000
        percent = math.floor(adjust*100*n_correct/n_total)/adjust
        best_percent = percent if percent > best_percent else best_percent
        print("Epoch " + str(epoch+1) +" training accuracy : " + str(percent) + "%\n")
        
    return best_percent


def weights_init(m):
    if isinstance(m, torch.nn.Conv2d):
        m.weight.data.normal_(0, 0.02)
        m.bias.data.normal_(0, 0.001)
    
    if isinstance(m, torch.nn.Linear):
        m.weight.data.normal_(0, 0.02)
        m.bias.data.normal_(0, 0.001)
        
        

In [21]:
batch_sizes = [100] #[25, 50, 100, 150, 200]
learning_rates = [0.00001] #[0.001, 0.005, 0.01, 0.05, 0.1, 0.5]

n_epochs = 50

In [32]:
for bs in batch_sizes:
    for lr in learning_rates:
        trainModel = getModel().to(device=device)
        weights_init(trainModel)
        percent = train(trainModel, n_epochs, bs, lr)
        print("bs=" + str(bs) + " lr=" + str(lr) + " : " + str(percent) + "%")


Epoch 1 training accuracy : 0.68%

Epoch 2 training accuracy : 1.0%

Epoch 3 training accuracy : 1.12%

Epoch 4 training accuracy : 5.8%

Epoch 5 training accuracy : 9.53%

Epoch 6 training accuracy : 15.34%

Epoch 7 training accuracy : 21.91%

Epoch 8 training accuracy : 31.62%

Epoch 9 training accuracy : 40.4%

Epoch 10 training accuracy : 50.17%

Epoch 11 training accuracy : 60.46%

Epoch 12 training accuracy : 66.12%

Epoch 13 training accuracy : 71.44%

Epoch 14 training accuracy : 74.34%

Epoch 15 training accuracy : 76.96%

Epoch 16 training accuracy : 80.13%

Epoch 17 training accuracy : 82.54%

Epoch 18 training accuracy : 84.11%

Epoch 19 training accuracy : 86.59%

Epoch 20 training accuracy : 87.78%

Epoch 21 training accuracy : 89.32%

Epoch 22 training accuracy : 90.54%

Epoch 23 training accuracy : 91.41%

Epoch 24 training accuracy : 92.47%

Epoch 25 training accuracy : 93.15%

Epoch 26 training accuracy : 93.81%

Epoch 27 training accuracy : 94.12%

Epoch 28 training 

In [22]:
trainModel = getModel().to(device=device)
weights_init(trainModel)
n_epochs = 50

train(trainModel, n_epochs, batch_sizes[0], learning_rates[0])

Epoch 1 training accuracy : 0.05%

Epoch 2 training accuracy : 0.83%

Epoch 3 training accuracy : 1.0%

Epoch 4 training accuracy : 1.04%

Epoch 5 training accuracy : 1.02%

Epoch 6 training accuracy : 3.27%

Epoch 7 training accuracy : 6.87%

Epoch 8 training accuracy : 12.05%

Epoch 9 training accuracy : 18.55%

Epoch 10 training accuracy : 28.07%

Epoch 11 training accuracy : 38.13%

Epoch 12 training accuracy : 46.45%

Epoch 13 training accuracy : 52.92%

Epoch 14 training accuracy : 58.0%

Epoch 15 training accuracy : 63.68%

Epoch 16 training accuracy : 68.02%

Epoch 17 training accuracy : 71.78%

Epoch 18 training accuracy : 75.0%

Epoch 19 training accuracy : 77.46%

Epoch 20 training accuracy : 79.88%

Epoch 21 training accuracy : 81.75%

Epoch 22 training accuracy : 84.85%

Epoch 23 training accuracy : 86.07%

Epoch 24 training accuracy : 87.64%

Epoch 25 training accuracy : 89.43%

Epoch 26 training accuracy : 91.28%

Epoch 27 training accuracy : 92.97%

Epoch 28 training ac

96.15

In [24]:
torch.save(trainModel,"Models/kanji_model_96_1.pt")

In [25]:
evalModel = torch.load('kanji_model_96_1.pt', map_location=torch.device(device))
print(evalModel)

kanji_detector(
  (sequence): Sequential(
    (0): Conv2d(1, 32, kernel_size=(17, 17), stride=(1, 1))
    (1): ReLU()
    (2): Dropout(p=0.4, inplace=False)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (5): Dropout(p=0.2, inplace=False)
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (9): Dropout(p=0.2, inplace=False)
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Flatten(start_dim=1, end_dim=-1)
    (13): Linear(in_features=2048, out_features=512, bias=True)
    (14): ReLU()
    (15): Linear(in_features=512, out_features=2199, bias=True)
  )
)


In [26]:
def eval(model, n_epoch, batch_size):
    n_batches = 1
    
    with torch.no_grad():

        model.eval()
        loss_f = torch.nn.CrossEntropyLoss()
        best_percent = 0
        for epoch in range(n_epoch):
            #print("Epoch " + str(epoch+1) + " is running")
            n_correct = 0
            n_total = n_batches*batch_size
            for i in range(n_batches):

                batch = selectBatches(dictNames, tempDictNames, batch_size)
                images = getPictures(batch)
                correct_answer, correct_answer_indices = getAnswerIndices(batch)

                answer = model(images.to(device=device))
                loss = loss_f(answer,correct_answer_indices.to(device=device)).cpu()

                print(answer)
                n_correct += countCorrect(answer, correct_answer.to(device=device))

            adjust = 10000
            percent = math.floor(adjust*100*n_correct/n_total)/adjust
            best_percent = percent if percent > best_percent else best_percent
            print("Epoch " + str(epoch+1) +" evaluation accuracy : " + str(percent) + "%\n")
        
    return best_percent

In [27]:
eval(trainModel,1,100)

tensor([[-2.7393, -3.0892, -4.2000,  ..., -3.7937, -4.1769, -3.1474],
        [-4.2448, -2.5073, -4.6589,  ..., -3.3550, -4.6312, -3.9381],
        [-2.0208, -2.3091, -3.8607,  ..., -3.7006, -4.2303, -2.8342],
        ...,
        [-2.2126, -2.3520, -3.3818,  ..., -3.5510, -2.9092, -2.8869],
        [-2.5157, -2.2300, -3.7171,  ..., -3.9248, -3.3962, -3.0028],
        [-3.4204, -3.6705, -4.8800,  ..., -4.4643, -5.3411, -5.0325]],
       device='cuda:0')
Epoch 1 evaluation accuracy : 96.0%



96.0