In [None]:
import os
import time
from tqdm import tqdm
import numpy as np
import csv
import pandas as pd
from torch.utils.data import Dataset
from PIL import Image
import torch
import torchvision
import cv2
from PIL import ImageOps,ImageDraw, ImageFont
import matplotlib.pyplot as plt
from torch.utils.data import SubsetRandomSampler
from torch.utils.data import DataLoader
from skimage.transform import resize
from IPython.display import FileLink

gpu_available = torch.cuda.is_available()
print(gpu_available)


In [None]:
fontsize = 15

#Uncomment to download the font.
# From https://www.google.com/get/noto/
#!wget -q --show-progress https://noto-website-2.storage.googleapis.com/pkgs/NotoSansCJKjp-hinted.zip
#!unzip -p NotoSansCJKjp-hinted.zip NotoSansCJKjp-Regular.otf > NotoSansCJKjp-Regular.otf
#!rm NotoSansCJKjp-hinted.zip

font = ImageFont.truetype('./kuzishiju-recognition/NotoSansCJKjp-Regular.otf', fontsize, encoding='utf-8')

In [None]:
classes = []
with open('train.csv') as csvfile:
    data =csv.reader(csvfile,delimiter=',')
    for row in data:
        splitter = row[1].split()
        for word in splitter:
            if word.startswith('U'):
                if not word in classes:
                    classes.append(word)

classes = sorted(classes)

In [None]:
data = pd.read_csv('unicode_translation.csv')
df = pd.DataFrame(data)
unicode_map = {codepoint: char for codepoint, char in data.values}

In [None]:
def calcMaxWidthMaxHeight(images_list,path):
    max_height, max_width = 0,0
    for image in images_list:
        image_path = os.path.join(path,image)
        img = Image.open(image_path)
        if img.size[0] > max_width:
            max_width = img.size[0]
            
        if img.size[1] > max_height:
            max_height = img.size[1]
            
    return max_width,max_height
    

def downSample(img,scale_size,max_height=4493,max_width=3248):
    
    new_w = (max_width - img.size[0])
    new_h = (max_height - img.size[1])
    if new_w%2 != 0:
        right_pad = int(new_w/2)
        left_pad = int(new_w/2) + 1
        if new_h%2 !=0:
            top_pad = int(new_h/2)
            bottom_pad = int(new_h/2) + 1       
        else:
            top_pad = int(new_h/2)
            bottom_pad = int(new_h/2)
    else:
        right_pad = int(new_w/2)
        left_pad = int(new_w/2)
        if new_h%2 !=0:
            top_pad = int(new_h/2)
            bottom_pad = int(new_h/2) + 1       
        else:
            top_pad = int(new_h/2)
            bottom_pad = int(new_h/2)
    padding = (right_pad,top_pad,left_pad,bottom_pad)
    new_img = ImageOps.expand(img,padding)
    new_img = new_img.resize((int(new_img.size[0]*scale_size),int(new_img.size[1]*scale_size)))
    return new_img,right_pad,left_pad,top_pad,bottom_pad



def newCoords(x,y,w,h,right_pad,left_pad,top_pad,bottom_pad,scale_size):
    x,y,w,h = int((x+left_pad)*scale_size),int((y+bottom_pad)*scale_size),int(w*scale_size),int(h*scale_size)
    x1 = x + w
    y1 = y + h
    
    return x,y,x1,y1

def return_original(img,right_pad,left_pad,top_pad,bottom_pad,scale_size):
    
    img = img[int(top_pad*scale_size):img.shape[0]-int(bottom_pad*scale_size)-1,
              int(right_pad*scale_size):img.shape[1]-int(left_pad*scale_size)-1]
    expand_scale = 1/scale_size
    img = resize(img,(img.shape[0]*expand_scale,img.shape[1]*expand_scale,img.shape[2]))
   
    return img

def old_coords(bbox,right_pad,left_pad,top_pad,bottom_pad,scale_size):
    x,y,x1,y1 = bbox
    expand_scale = 1/scale_size
    x,y,x1,y1 = int((x*expand_scale)-left_pad),int((y*expand_scale)-bottom_pad),int((x1*expand_scale)-right_pad),int((y1*expand_scale)-bottom_pad)
    
    return x,y,x1,y1


In [None]:
class CharsDataset(Dataset):
    def __init__(self,path,csvfile=None,train=True):
        self.images = os.listdir(path)
        self.path = path
        self.train = train
        self.csvfile = csvfile
        #self.max_width, self.max_height = calcMaxWidthMaxHeight(self.images,path) #uncomment to calculate the max width and height in your dataset
        if train:
            data_temp = pd.read_csv(csvfile,index_col=0)
            df_temp = pd.DataFrame(data_temp)
            self.df_temp = df_temp.dropna()
            for image in self.images:
                if (image[:-4] in self.df_temp.index) == False:
                    self.images = list(filter((image).__ne__,self.images))

    def __getitem__(self,idx):
        scale_size = 0.2
        image = self.images[idx]
        image_path = os.path.join(self.path,image)
        img = Image.open(image_path)
        new_img,right_pad,left_pad,top_pad,bottom_pad = downSample(img,scale_size) #insert the max_width, max_height if custom dataset.        
        img_tensor = torchvision.transforms.functional.to_tensor(new_img)
        if self.train is True:
            df = self.df_temp
            _id = image[:-4]
            ret = df.loc[_id,:]
            labels = ret[0].split()        
            goals = {}
            targets = []
            bboxes = []
            for i in range(0,len(labels),5):   
                final_label = labels[i:i+5]
                label,x,y,w,h = final_label[0],int(final_label[1]),int(final_label[2]),int(final_label[3]),int(final_label[4])
                index = classes.index(label)
                x,y,x1,y1 = newCoords(x,y,w,h,right_pad,left_pad,top_pad,bottom_pad,scale_size)
                targets.append(index)
                bboxes.append([x,y,x1,y1])
            targets = torch.tensor(targets,dtype=torch.int64)
            bboxes = torch.as_tensor(bboxes,dtype=torch.float32)
            goals['labels'] = targets
            goals['boxes']= bboxes
            
            return img_tensor,goals
        else:
             return img_tensor,right_pad,left_pad,top_pad,bottom_pad,image
         
    def __len__(self):
        return len(self.images)

In [None]:
start = time.time()
train_data = CharsDataset('train_images','train.csv')
print('elapse time = ', time.time() - start)

In [None]:
def custom_collate_fn(batch):
    size = len(batch)
    tensors_images = []
    tensors_targets = []
    for i in range(0,size):
        data = batch[i][0]
        
        labels = batch[i][1]['labels']
        bboxes = batch[i][1]['boxes']
        
        tensors_images.append(data)
        tensors_targets.append({'labels':torch.tensor(labels,dtype=torch.int64),'boxes':torch.tensor(bboxes,dtype=torch.float32)})
    imgs = torch.stack([i for i in tensors_images],dim=0)
    
    return imgs,tensors_targets

In [None]:
def random_sampling(data,model=None,train=True):
    number = np.random.randint(0,len(data))
    if train:
        data,targets = data[number]
        img = np.transpose(data.numpy(),(1,2,0))
        labels = targets['labels']
        boxes = targets['boxes']
    else:
        data,right_pad,left_pad,top_pad,bottom_pad,name = data[number]
        data = data.unsqueeze(dim=0)
        if gpu_available:
            data = data.cuda()
            model = model.cuda()
        model.eval()
        output = model(data)
        labels = output[0]['labels']
        boxes = output[0]['boxes']
        #data = data.cuda()
        if gpu_available:
           data = data.cpu()
        data = data.squeeze()
        img = np.transpose(data.numpy(),(1,2,0))
        img = return_original(img,right_pad,left_pad,top_pad,bottom_pad,scale_size=0.2)
    
    
    box_img = np.zeros_like(img)
    box_img = Image.fromarray(box_img,mode='RGB')
    box_img = ImageOps.grayscale(box_img)
    
    char_img = np.zeros_like(img)
    char_img = Image.fromarray(char_img,mode='RGB')
    char_img = ImageOps.grayscale(char_img)
    
    box_draw = ImageDraw.Draw(box_img)
    char_draw = ImageDraw.Draw(char_img)
    for label,box in zip(labels,boxes):
        if not train:
            x,y,x2,y2 = old_coords(box,right_pad,left_pad,top_pad,bottom_pad,0.2)
            font = ImageFont.truetype('../kuzishiju-training-2/NotoSansCJKjp-Regular.otf', 70, encoding='utf-8')
            width = 5
        else:
            [x,y,x2,y2] = box
            width = 1
            font = ImageFont.truetype('../kuzishiju-training-2/NotoSansCJKjp-Regular.otf', fontsize, encoding='utf-8')
        w,h = x2-x , y2-y
        char = unicode_map[classes[label]]
        box_draw.rectangle([x,y,x2,y2],outline=(255),width=width)
        char_draw.text((x + w + fontsize/4, y + h/2 - fontsize), char, fill=(255), font=font)
     
    box_img = np.array(box_img)
    char_img = np.array(char_img)
    img[box_img > 0] = (255,0,0)
    img[char_img > 0] = (0,0,255)
    
    plt.figure(figsize=(50,20))
    plt.imshow(img)
    
random_sampling(train_data)

In [None]:
batch_size = 4
num_workers = 0 
valid_size = 0.2


train_loader = DataLoader(train_data, batch_size=batch_size, collate_fn=custom_collate_fn)

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False,num_classes=4212)
for param in model.parameters():
    param.requires_grad=True

In [None]:
def train(model,train_dataloader,epochs=20,lr=1e-3):
    print('Training the network')
    optimizer = torch.optim.Adam(model.parameters(),lr = lr)
    
    if gpu_available:
        model = model.cuda()
        
    for epoch in range(1,epochs+1):
        train_loss = 0
        valid_loss = 0
        model.train()
        for data,target in tqdm(train_dataloader):
            if gpu_available:
                data = data.cuda()
                target = [{k: v.cuda() for k, v in t.items()} for t in target]
            optimizer.zero_grad()
            output = model(data,target)
            loss = sum(loss for loss in output.values())
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()*data.size(0)
     
        train_loss = train_loss/len(train_dataloader.dataset)    
        print("Epoch {}/{} ...... Train Loss {:.6f}".format(epoch,epochs,train_loss))
        
        
            

**Train the model**

In [None]:
torch.cuda.empty_cache()
start = time.time()
train(model,train_loader)
print('Training time:',time.time() - start)

In [None]:
torch.save(model.state_dict(),"kuzushiji-recognition.pt")

In [None]:
#model.load_state_dict(torch.load('kuzushiji-recognition.pt'))

**Test the model**

In [None]:
test_data = CharsDataset('test_images',train=False)
random_sampling(test_data,model,train=False)

**To create sample file**

In [None]:
def create_sample_file(loader,model):
    with torch.no_grad():
        images = []
        goals = []
        for data,right_pad,left_pad,top_pad,bottom_pad,name in tqdm(loader):
            for param in model.parameters():
                param.requires_grad=False
            model.eval()
            if gpu_available:
                model = model.cuda()
                data = data.cuda()
            data = data.unsqueeze(dim=0)
            output = model(data)
            labels = output[0]['labels']
            boxes = output[0]['boxes']
            
            if gpu_available:
               data = data.cpu()
            data = data.squeeze()
            img = np.transpose(data.numpy(),(1,2,0))
            img = return_original(img,right_pad,left_pad,top_pad,bottom_pad,scale_size=0.2)

            targets = []
            for label,box in zip(labels,boxes):
                x,y,x2,y2 = old_coords(box,right_pad,left_pad,top_pad,bottom_pad,0.2)
                w,h = x2-x,y2-y
                x_center,y_center = int((x+x2)/2),int((y+y2)/2)
                char = classes[label]
                targets.append(' '.join(map(str,[char,x_center,y_center])))
            goals.append(' '.join(map(str,targets)))
            images.append(name[:-4])
        dicts = {'image_id':images,'labels':goals}
        df_dict = pd.DataFrame.from_dict(dicts)   
        return df_dict

In [None]:
torch.cuda.empty_cache()
start = time.time()
df_dict = create_sample_file(test_data,model)
print(time.time()-start)
df_dict

In [None]:
df_dict.to_csv('../../working/submission6.csv',index=False)