In [1]:
import torch
import hub
import random
import math
import PIL
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from torchinfo import summary
from torchvision import transforms
from torchvision.io import read_image
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
import pandas as pd

from typing import Callable
import csv
import copy
import time
import json
import pathlib
import os
from os import listdir
from os.path import isfile, join

device = "cuda" if torch.cuda.is_available() else "cpu"

In [8]:
image_size = 64
nb_symbols = 2199

from kanji_detection_model import kanji_detector

    
def getModel():
    return kanji_detector()


def testModel():
    modelRunnable = getModel().to(device=device)
    print(modelRunnable)
    
    summary1 = summary(
        modelRunnable,
        input_size=[
            (20, 1, image_size, image_size)
        ],
        dtypes=[torch.double, torch.double],
        depth=3
    )
    
    print(summary1)
    
    del modelRunnable
    torch.cuda.empty_cache()



In [9]:
print("Allocated : " + str(torch.cuda.memory_allocated()))
print("Reserved : " + str(torch.cuda.memory_reserved()))

#testModel()
#start = time.time()
print("Allocated : " + str(torch.cuda.memory_allocated()))
print("Reserved : " + str(torch.cuda.memory_reserved()))

Allocated : 0
Reserved : 0
Allocated : 0
Reserved : 0


In [55]:
trainingPath = pathlib.Path().resolve() / "Training_set"

train_names_list = []
eval_names_list = []
length_train = 0
length_eval = 0

g_dictNames2Labels = {}

with open('loader_data_train.csv', 'r', encoding='UTF8') as file_train:
    reader = csv.reader(file_train, delimiter=',')
    next(reader)
    length_train = sum(1 for line in file_train)
    
with open('loader_data_eval.csv', 'r', encoding='UTF8') as file_eval:
    reader = csv.reader(file_eval, delimiter=',')
    next(reader)
    length_eval = sum(1 for line in file_eval)

with open('loader_data_train.csv', 'r', encoding='UTF8') as file_train_2:
    reader2 = csv.reader(file_train_2, delimiter=',')
    next(reader2)
    train_filenames_list = [""]*length_train
    for row in reader2:
        name = row[0]
        label = int(row[1])
        g_dictNames2Labels[name]=label
        train_names_list.append(name)
    
with open('loader_data_eval.csv', 'r', encoding='UTF8') as file_eval_2:
    reader2 = csv.reader(file_eval_2, delimiter=',')
    next(reader2)
    eval_filenames_list = [""]*length_eval
    for row in reader2:
        name = row[0]
        label = int(row[1])
        g_dictNames2Labels[name]=label
        eval_names_list.append(name)

random.shuffle(train_names_list)
random.shuffle(eval_filenames_list)

#picturesNames = [f for f in listdir(trainingPath) if isfile(join(trainingPath, f))]
#g_dictNames = {name:{'name':name , 'number':int(name.split('_')[0]) , 'symbol':name.split('_')[1]} for name in picturesNames}
#shufflePicturesNames = picturesNames.copy()
#random.shuffle(shufflePicturesNames)


In [11]:
class KanjiImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file, delimiter=',', header=0)
        #print(self.img_labels)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        #image = read_image(img_path)
        image = Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
    
img_transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize(image_size),
    transforms.ToTensor()
])

training_data = KanjiImageDataset("loader_data_train.csv", ".\Training_set", img_transform, int)
evaluation_data = KanjiImageDataset("loader_data_eval.csv", ".\Training_set", img_transform, int)

In [60]:
selectorIndexTrain = [0]
selectorIndexEval = [0]
def selectBatches(batch_size: int, isTraining: bool) -> list:
    global selectorIndexTrain
    global selectorIndexEval
    global train_name_list
    global eval_name_list
    
    true_list_len = len(train_names_list) if isTraining else len(eval_names_list)
    names_list = train_names_list if isTraining else eval_names_list
    selector_index = selectorIndexTrain if isTraining else selectorIndexEval
    
    labels = []
    names = []
    while len(labels) < batch_size:
        list_len = true_list_len - selector_index[0]
        n_to_find = batch_size - len(labels)
        start = selector_index[0]
        
        if list_len >= n_to_find:
            end = selector_index[0]+n_to_find
            labels.extend([g_dictNames[name] for name in names_list[start:end]])
            names.extend(names_list[start:end])
            selector_index[0] += n_to_find
        else:
            labels.extend([g_dictNames[name] for name in names_list[start:]])
            names.extend(names_list[start:])
            random.shuffle(names_list)
            selector_index[0] = 0
    
    #print(str(batch[0]) +" "+ str(batch[99]))
    #print(batch_size)
    #print(len(batch))
    
    return torch.LongTensor(labels), names

def getAnswerIndices(batchList) -> torch.FloatTensor:
    
    correctAnswer = torch.zeros((len(batchList),nb_symbols)).float()
    correctAnswerIndices = torch.zeros(len(batchList)).long()
    
    for i in range(len(batchList)):
        #indexCorrect = batchList[i][1]['number']-1
        indexCorrect = batchList[i]['number']-1
        correctAnswer[i][indexCorrect] = 1
        correctAnswerIndices[i] = indexCorrect
    
    
    return correctAnswer, correctAnswerIndices


#def countCorrect(answer: torch.FloatTensor, correctAnswer: torch.FloatTensor):
def countCorrect(answer: torch.FloatTensor, correctAnswerIndices: torch.FloatTensor):
    
    _,indicesAnswer = torch.max(answer, dim=1)
    
    #print(indicesAnswer)
    #print(indicesCorrect)
    numCorrect = (indicesAnswer == correctAnswerIndices).long().sum()
    
    return numCorrect.item()

#def countTop5Correct(answer: torch.FloatTensor, correctAnswer: torch.FloatTensor):
def countTop5Correct(answer: torch.FloatTensor, correctAnswerIndices: torch.FloatTensor):
    _,indicesAnswer = answer.topk(k=5, dim=1)
    
    #print(indicesAnswer.shape)
    #print(correctAnswerIndices.shape)
    numCorrect = (indicesAnswer == correctAnswerIndices.unsqueeze(-1)).long().sum()
    
    return numCorrect.item()
    
def getPictures(filenames) -> torch.FloatTensor:
    
    images = torch.zeros((len(filenames), 1, 40, 40)).to(device=device)
    labels = []
    
    to_tensor = transforms.ToTensor()
    
    convert_tensor = transforms.Compose([
        #transforms.Grayscale(),
        transforms.Resize(image_size)
        #transforms.ToTensor()
    ])
    
    i=0
    for filename in filenames:
        img = Image.open(trainingPath / filename)
        #images[i,:,:,:] = convert_tensor(img)
        images[i,:,:,:] = to_tensor(img).to(device=device)
        i+=1
    
    images = convert_tensor(images)
    return images


In [73]:

def train(model, n_epoch, batch_size, lr: Callable[[int], float]):
    n_batches = 100
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr(0))

    sampler_train = sampler=torch.utils.data.RandomSampler(training_data, replacement=False)
    sampler_eval = sampler=torch.utils.data.RandomSampler(training_data, replacement=False)
    
    #dataloader_train = torch.utils.data.DataLoader(training_data, batch_size=batch_size, shuffle=True, num_workers=0)
    #dataloader_eval = torch.utils.data.DataLoader(evaluation_data, batch_size=batch_size, shuffle=True, num_workers=0)
    dataloader_train = torch.utils.data.DataLoader(training_data, batch_size=batch_size, sampler=sampler_train, num_workers=0)
    dataloader_eval = torch.utils.data.DataLoader(evaluation_data, batch_size=batch_size, sampler=sampler_train, num_workers=0)
    
    frequency_detailed_results = 5
    
    model.train()
    loss_f = torch.nn.CrossEntropyLoss()
    best_percent = 0
    for epoch in range(n_epoch):
        
        n_correct_1_t = 0
        n_correct_5_t = 0
        n_correct_1_e = 0
        n_correct_5_e = 0
        
        n_total = n_batches*batch_size
        t_loss = 0
        
        for g in optimizer.param_groups:
            g['lr'] = lr(epoch)
        #model.zero_grad()
            
        time_select = 0 #debug
        time_pictures = 0 #debug
        time_model = 0 #debug
            
        print("Epoch " + str(epoch+1))
        
        for i in range(n_batches):

            model.zero_grad()
            #optimizer.zero_grad() not needed ?
            
            start = time.time() #debug
            labels, filenames = selectBatches(batch_size, isTraining=True)
            #images, correct_answer_indices = next(iter(dataloader_train))
            end = time.time() #debug
            time_select += end-start # debug
            
            start = time.time() #debug
            images = getPictures(filenames)
            end = time.time() #debug
            time_pictures += end-start # debug
            
            #correct_answer, correct_answer_indices = getAnswerIndices(batch)
            
            start = time.time() #debug
            answer = model(images.to(device=device))
            end = time.time() #debug
            time_model += end-start # debug
            
            loss = loss_f(answer,labels.to(device=device)).cpu()
            t_loss += loss.item()
            
            loss.backward()
            optimizer.step() #Trying at the end of the epoch ?
            
            if (epoch+1) % frequency_detailed_results == 0:
                #images_eval, correct_answer_indices_eval = next(iter(dataloader_eval))
                #answer_eval = model(images_eval.to(device=device))
                
                labels_eval, filenames_eval = selectBatches(batch_size, isTraining=False)
                images_eval = getPictures(filenames_eval)
                answer_eval = model(images_eval.to(device=device))
                
                n_correct_1_t += countCorrect(answer, labels.to(device=device))
                n_correct_5_t += countTop5Correct(answer, labels.to(device=device))
                n_correct_1_e += countCorrect(answer_eval, labels_eval.to(device=device))
                n_correct_5_e += countTop5Correct(answer_eval, labels_eval.to(device=device))
            
            
            #print(loss.item())
            #print(torch.softmax(answer,dim=1))
            #print(correct_answer)
            #print(n_correct)
        
        #optimizer.step()
        adjust = 100
        percent_1_t = math.floor(adjust*100*n_correct_1_t/n_total)/adjust
        percent_5_t = math.floor(adjust*100*n_correct_5_t/n_total)/adjust
        percent_1_e = math.floor(adjust*100*n_correct_1_e/n_total)/adjust
        percent_5_e = math.floor(adjust*100*n_correct_5_e/n_total)/adjust
        
        display_loss = math.floor(adjust*t_loss)/adjust
        
        best_percent = percent_5_e if percent_5_e > best_percent else best_percent
        
        #print("Time select : " + str(math.floor(time_select))) #debug
        #print("Time pictures : " + str(math.floor(time_pictures))) #debug
        #print("Time model : " + str(math.floor(time_model))) #debug
        
        print("\tLoss : " + str(display_loss))
        
        if (epoch+1) % frequency_detailed_results == 0:
            print("\tTop-1 training accuracy : " + str(percent_1_t) + "%")
            print("\tTop-5 training accuracy : " + str(percent_5_t) + "%")
            print("\tTop-1 evaluation accuracy : " + str(percent_1_e) + "%")
            print("\tTop-5 evaluation accuracy : " + str(percent_5_e) + "%")
        
        if percent_5_e > 98.0:
            break
        
        print("")
        
    return best_percent


def weights_init(m):
    if isinstance(m, torch.nn.Conv2d):
        m.weight.data.normal_(0, 0.02)
        m.bias.data.normal_(0, 0.001)
    
    if isinstance(m, torch.nn.Linear):
        m.weight.data.normal_(0, 0.02)
        m.bias.data.normal_(0, 0.001)
        
        

In [74]:
batch_sizes = [100] #[25, 50, 100, 150, 200]
learning_rates = [0.00001] #[0.001, 0.005, 0.01, 0.05, 0.1, 0.5]

n_epochs = 50

In [75]:
"""
for bs in batch_sizes:
    for lr in learning_rates:
        trainModel = getModel().to(device=device)
        weights_init(trainModel)
        percent = train(trainModel, n_epochs, bs, lr)
        print("bs=" + str(bs) + " lr=" + str(lr) + " : " + str(percent) + "%")
"""

'\nfor bs in batch_sizes:\n    for lr in learning_rates:\n        trainModel = getModel().to(device=device)\n        weights_init(trainModel)\n        percent = train(trainModel, n_epochs, bs, lr)\n        print("bs=" + str(bs) + " lr=" + str(lr) + " : " + str(percent) + "%")\n'

In [76]:
trainModel = getModel().to(device=device)
weights_init(trainModel)
n_epochs = 700

#lr: Callable[[int], float] = lambda epoch: 0.0003
lr: Callable[[int], float] = lambda epoch: 0.0005/(1.002**epoch)

print("Running on " + device + "\n")
#train(trainModel, n_epochs, batch_sizes[0], learning_rates[0])
train(trainModel, n_epochs, 100, lr)

Running on cuda

Epoch 1
	Loss : 769.72

Epoch 2
	Loss : 769.64

Epoch 3
	Loss : 769.66

Epoch 4
	Loss : 769.65

Epoch 5
	Loss : 769.67
	Top-1 training accuracy : 0.05%
	Top-5 training accuracy : 0.21%
	Top-1 evaluation accuracy : 0.0%
	Top-5 evaluation accuracy : 0.0%

Epoch 6
	Loss : 769.7

Epoch 7
	Loss : 769.66

Epoch 8
	Loss : 769.71

Epoch 9
	Loss : 769.75

Epoch 10
	Loss : 769.64
	Top-1 training accuracy : 0.07%
	Top-5 training accuracy : 0.24%
	Top-1 evaluation accuracy : 0.2%
	Top-5 evaluation accuracy : 0.27%

Epoch 11
	Loss : 769.75

Epoch 12
	Loss : 769.72

Epoch 13
	Loss : 769.69

Epoch 14
	Loss : 769.68

Epoch 15
	Loss : 769.47
	Top-1 training accuracy : 0.02%
	Top-5 training accuracy : 0.21%
	Top-1 evaluation accuracy : 0.01%
	Top-5 evaluation accuracy : 0.13%

Epoch 16
	Loss : 769.37

Epoch 17
	Loss : 769.22

Epoch 18
	Loss : 768.88

Epoch 19
	Loss : 768.27

Epoch 20
	Loss : 766.74
	Top-1 training accuracy : 0.08%
	Top-5 training accuracy : 0.42%
	Top-1 evaluation accur

88.49

In [77]:
trainModel.eval()

torch.save(trainModel.cpu(),"./Models/kanji_model_v5_top5_88_eval.pt")

torch.save(trainModel.cpu().state_dict(), "./Models/kanji_model_v5_top5_8_eval.pth")

#temp = torch.jit.script(trainModel.cpu())
#torch.jit.save(temp, "./Models/kanji_model_96_1.pt")


In [9]:
def inspectState(model: torch.nn.Module):
    for params in model.state_dict():
        state_i = model.state_dict()[params]
        state_start = state_i
        state_end = state_i
        while state_start.dim() > 0:
            state_start = state_start[0]
            state_end = state_end[-1]


        print(params + " : " + str(state_start.item()) + " and " + str(state_end.item()))


In [20]:
print(trainModel.__class__)

<class 'torch.nn.modules.container.Sequential'>


In [21]:
inspectState(trainModel)

0.weight : -0.04455416649580002 and 0.03386272117495537
0.bias : -0.04028937220573425 and -0.011241798289120197
4.weight : -0.01995515078306198 and 0.008423327468335629
4.bias : 0.011923067271709442 and -0.03353721275925636
8.weight : -0.0362517312169075 and -0.002826642943546176
8.bias : 0.0018905889010056853 and 0.010959293693304062
13.weight : -0.012193404138088226 and -0.015958208590745926
13.bias : -0.004388758447021246 and 0.006354519631713629
15.weight : -0.032839540392160416 and 0.006418132688850164
15.bias : -0.043542418628931046 and -0.009433044120669365


In [7]:
#evalModel = torch.load('./Models/kanji_model_96_1.pt')

evalModel = getModel().to(device=device)
model_dict = torch.load('./Models/kanji_model_98_1.pth')
evalModel.load_state_dict(model_dict, strict=True)

#evalModel = torch.jit.load('./Models/kanji_model_96_1.pt')



<All keys matched successfully>

In [10]:
inspectState(evalModel)

0.weight : -0.008655642159283161 and 0.031196339055895805
0.bias : -0.04842814430594444 and -0.021925682201981544
4.weight : -0.01743035577237606 and 0.03202219679951668
4.bias : 0.03816123679280281 and -0.016736704856157303
8.weight : 0.030330840498209 and -0.027031581848859787
8.bias : 0.02300272136926651 and -0.03175605833530426
13.weight : 0.01707916148006916 and 0.003585587255656719
13.bias : 0.0007165489951148629 and -0.001977045787498355
15.weight : 0.004156413488090038 and 0.026956716552376747
15.bias : 0.0008101628045551479 and 0.013689238578081131


In [11]:
"""
print(len(g_tempDictNames))
print(len(g_dictNames))
g_tempDictNames = g_dictNames.copy() #This is the problem for some reason
#tempDictNames = dictNames.copy()
print(len(g_tempDictNames))
print(len(g_dictNames))
"""

def eval(model, n_epoch, batch_size):
    n_batches = 1
    
    with torch.no_grad():

        model.eval()
        #model.train()
        loss_f = torch.nn.CrossEntropyLoss()
        best_percent = 0
        for epoch in range(n_epoch):
            #print("Epoch " + str(epoch+1) + " is running")
            n_correct = 0
            n_total = n_batches*batch_size
            for i in range(n_batches):

                batch = selectBatches(g_dictNames, g_tempDictNames, batch_size)
                #batch = selectBatches(batch_size)
                images = getPictures(batch)
                correct_answer, correct_answer_indices = getAnswerIndices(batch)

                answer = model(images.to(device=device))
                loss = loss_f(answer,correct_answer_indices.to(device=device)).cpu()

                print(answer)
                n_correct += countCorrect(answer, correct_answer.to(device=device))

            adjust = 10000
            percent = math.floor(adjust*100*n_correct/n_total)/adjust
            best_percent = percent if percent > best_percent else best_percent
            print("Epoch " + str(epoch+1) +" evaluation accuracy : " + str(percent) + "%\n")
        
    return best_percent

In [12]:
eval(evalModel.to(device=device),1,100)

tensor([[-30.7500, -30.3785, -29.4361,  ..., -37.8556, -23.0533, -33.4772],
        [-24.5192, -24.5983, -30.3955,  ..., -22.6724, -18.2914, -36.8083],
        [-30.6164, -27.6459, -25.5070,  ..., -27.2292, -35.5250, -37.2954],
        ...,
        [-24.6119, -19.9026, -20.3533,  ..., -16.2286, -16.4663, -19.7740],
        [-34.0735, -23.5342, -20.3726,  ..., -14.8722, -26.1511, -11.7473],
        [-25.3415, -21.5988, -21.1611,  ..., -19.5254, -23.0455, -23.3526]],
       device='cuda:0')
Epoch 1 evaluation accuracy : 99.0%



99.0

In [44]:
eval(evalModel.to(device=device),1,100)

tensor([[-0.7167, -2.0495, -1.5013,  ..., -0.9930,  0.2948, -1.3379],
        [-0.9753, -1.9709, -0.3110,  ..., -0.1124,  1.4938, -1.4874],
        [-1.0059, -1.6983, -1.2354,  ..., -0.7512,  0.1877, -1.6510],
        ...,
        [-1.2744, -1.9040, -0.9118,  ..., -0.5414,  1.2369, -1.2370],
        [-1.0824, -1.5088, -1.5241,  ..., -1.0356,  0.6794, -1.0304],
        [-0.9132, -2.1997,  0.5722,  ...,  0.7440,  2.2643, -1.8624]],
       device='cuda:0')
Epoch 1 evaluation accuracy : 3.0%



3.0

In [14]:
print(evalModel.__class__)

<class 'torch.nn.modules.container.Sequential'>


In [13]:
print(evalModel.__class__)

<class 'torch.nn.modules.container.Sequential'>


In [14]:
print(*evalModel.parameters())

Parameter containing:
tensor([[[[-0.0031,  0.0487,  0.0278,  ...,  0.0526,  0.0290, -0.0216],
          [-0.0020, -0.0315, -0.0169,  ..., -0.0420, -0.0482, -0.0201],
          [-0.0487, -0.0471,  0.0368,  ...,  0.0502,  0.0458,  0.0460],
          ...,
          [-0.0296, -0.0017,  0.0194,  ...,  0.0319,  0.0306,  0.0161],
          [-0.0225, -0.0144,  0.0589,  ..., -0.0539,  0.0564,  0.0209],
          [ 0.0169, -0.0119, -0.0383,  ...,  0.0165,  0.0391,  0.0411]]],


        [[[ 0.0222,  0.0100, -0.0513,  ..., -0.0445, -0.0077, -0.0489],
          [ 0.0139,  0.0470, -0.0056,  ...,  0.0431, -0.0338, -0.0077],
          [-0.0358,  0.0345, -0.0406,  ...,  0.0535, -0.0258,  0.0241],
          ...,
          [-0.0470, -0.0367, -0.0139,  ..., -0.0583,  0.0187, -0.0206],
          [ 0.0115,  0.0631,  0.0421,  ...,  0.0506, -0.0201, -0.0576],
          [ 0.0524, -0.0003, -0.0265,  ...,  0.0375,  0.0307, -0.0386]]],


        [[[-0.0216,  0.0411, -0.0373,  ..., -0.0284, -0.0488, -0.0079],
    

In [11]:
print(evalModel.state_dict())

OrderedDict([('sequence.0.weight', tensor([[[[ 0.0215, -0.0028,  0.0252,  ..., -0.0047,  0.0110, -0.0451],
          [-0.0361,  0.0106,  0.0056,  ...,  0.0254,  0.0558,  0.0129],
          [ 0.0346, -0.0083, -0.0217,  ...,  0.0034,  0.0469, -0.0269],
          ...,
          [ 0.0250, -0.0608,  0.0505,  ..., -0.0353, -0.0049, -0.0162],
          [-0.0557, -0.0370, -0.0039,  ..., -0.0258, -0.0156,  0.0315],
          [-0.0398,  0.0281,  0.0325,  ...,  0.0165,  0.0449,  0.0525]]],


        [[[-0.0079,  0.0476, -0.0267,  ..., -0.0211,  0.0335, -0.0410],
          [-0.0446, -0.0125,  0.0214,  ...,  0.0247, -0.0292,  0.0327],
          [ 0.0324, -0.0390, -0.0507,  ...,  0.0456,  0.0128,  0.0576],
          ...,
          [ 0.0042, -0.0332,  0.0245,  ..., -0.0328,  0.0296,  0.0040],
          [-0.0102,  0.0261,  0.0007,  ..., -0.0042,  0.0320,  0.0249],
          [ 0.0539,  0.0084,  0.0551,  ...,  0.0116,  0.0259, -0.0216]]],


        [[[-0.0383, -0.0135,  0.0014,  ...,  0.0164,  0.0334, -