# EAST

In [None]:
import os
import json

data_dir = "/kaggle/input/icar-data/0325updated.task1train(626p)/"
output_dir = "/kaggle/working/EAST/data"  
os.makedirs(output_dir, exist_ok=True) 

image_paths = []
boxes = []

for filename in sorted(os.listdir(data_dir)):
    if filename.endswith('.jpg'):
        img_path = os.path.join(data_dir, filename)
        txt_path = img_path.replace('.jpg', '.txt')

        if not os.path.exists(txt_path):
            continue

        with open(txt_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()

        image_boxes = []
        for line in lines:
            parts = line.strip().split(',')
            if len(parts) < 9:
                continue

            try:
                coords = list(map(int, parts[:8])) 
                image_boxes.append(coords)  
            except ValueError:
                continue

        if image_boxes:
            image_paths.append(img_path)
            boxes.append(image_boxes)

with open(os.path.join(output_dir, 'images.json'), 'w') as f:
    json.dump(image_paths, f, indent=2)

with open(os.path.join(output_dir, 'boxes.json'), 'w') as f:
    json.dump(boxes, f, indent=2)


In [None]:
import os
import json
import math
import numpy as np
import cv2
import torch
from shapely.geometry import Polygon
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def parse_annotation(annotation_path):
    boxes = list()

    with open(annotation_path, 'r') as f:
        for line in f.readlines():
            line = line.split(',')

            x1 = line[0]
            y1 = line[1]
            x2 = line[2]
            y2 = line[3]
            x3 = line[4]
            y3 = line[5]
            x4 = line[6]
            y4 = line[7]

            cor = [x1, y1, x2, y2, x3, y3, x4, y4]
            boxes.append(cor)

    return boxes


def create_json_data(directory='data/raw_data/train', images_json='EAST/data/images.json',
                     boxes_json='EAST/data/boxes.json'):
    image_paths = list()
    boxes = list()

    for filename in os.listdir(directory):
        filename = filename[: -4]

        if filename + '.txt' not in os.listdir(directory):
            continue

        if filename + '.jpg' not in os.listdir(directory):
            continue

        image_paths.append(os.path.join(directory, filename + '.jpg'))
        boxes.append(parse_annotation(directory + '/' + filename + '.txt'))

    with open(images_json, 'w') as f:
        json.dump(image_paths, f)

    with open(boxes_json, 'w') as f:
        json.dump(boxes, f)


def cal_distance(x1, y1, x2, y2):

    return math.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)


def move_points(vertices, index1, index2, r, coef):
    index1 = index1 % 4
    index2 = index2 % 4
    x1_index = index1 * 2 + 0
    y1_index = index1 * 2 + 1
    x2_index = index2 * 2 + 0
    y2_index = index2 * 2 + 1

    r1 = r[index1]
    r2 = r[index2]
    length_x = vertices[x1_index] - vertices[x2_index]
    length_y = vertices[y1_index] - vertices[y2_index]
    length = cal_distance(vertices[x1_index], vertices[y1_index], vertices[x2_index], vertices[y2_index])
    if length > 1:
        ratio = (r1 * coef) / length
        vertices[x1_index] += ratio * (-length_x)
        vertices[y1_index] += ratio * (-length_y)
        ratio = (r2 * coef) / length
        vertices[x2_index] += ratio * length_x
        vertices[y2_index] += ratio * length_y
    return vertices


def shrink_poly(vertices, coef=0.3):
    x1, y1, x2, y2, x3, y3, x4, y4 = vertices
    r1 = min(cal_distance(x1, y1, x2, y2), cal_distance(x1, y1, x4, y4))
    r2 = min(cal_distance(x2, y2, x1, y1), cal_distance(x2, y2, x3, y3))
    r3 = min(cal_distance(x3, y3, x2, y2), cal_distance(x3, y3, x4, y4))
    r4 = min(cal_distance(x4, y4, x1, y1), cal_distance(x4, y4, x3, y3))
    r = [r1, r2, r3, r4]

    # obtain offset to perform move_points() automatically
    if cal_distance(x1, y1, x2, y2) + cal_distance(x3, y3, x4, y4) > \
            cal_distance(x2, y2, x3, y3) + cal_distance(x1, y1, x4, y4):
        offset = 0  # two longer edges are (x1y1-x2y2) & (x3y3-x4y4)
    else:
        offset = 1  # two longer edges are (x2y2-x3y3) & (x4y4-x1y1)

    v = vertices.copy()
    v = move_points(v, 0 + offset, 1 + offset, r, coef)
    v = move_points(v, 2 + offset, 3 + offset, r, coef)
    v = move_points(v, 1 + offset, 2 + offset, r, coef)
    v = move_points(v, 3 + offset, 4 + offset, r, coef)
    return v


def get_rotate_mat(theta):

    return np.array([[math.cos(theta), -math.sin(theta)], [math.sin(theta), math.cos(theta)]])


def rotate_vertices(vertices, theta, anchor=None):
    v = vertices.reshape((4, 2)).T
    if anchor is None:
        anchor = v[:, :1]
    rotate_map = get_rotate_mat(theta)
    res = np.dot(rotate_map, v - anchor)

    return (res + anchor).T.reshape(-1)


def get_boundary(vertices):
    x1, y1, x2, y2, x3, y3, x4, y4 = vertices

    x_min = min(x1, x2, x3, x4)
    x_max = max(x1, x2, x3, x4)
    y_min = min(y1, y2, y3, y4)
    y_max = max(y1, y2, y3, y4)

    return x_min, x_max, y_min, y_max


def cal_error(vertices):
    x_min, x_max, y_min, y_max = get_boundary(vertices)
    x1, y1, x2, y2, x3, y3, x4, y4 = vertices
    err = cal_distance(x1, y1, x_min, y_min) + cal_distance(x2, y2, x_max, y_min) + \
            cal_distance(x3, y3, x_max, y_max) + cal_distance(x4, y4, x_min, y_max)

    return err


def find_min_rect_angle(vertices):
    angle_interval = 1
    angle_list = list(range(-90, 90, angle_interval))
    area_list = []
    for theta in angle_list:
        rotated = rotate_vertices(vertices, theta / 180 * math.pi)
        x1, y1, x2, y2, x3, y3, x4, y4 = rotated
        temp_area = (max(x1, x2, x3, x4) - min(x1, x2, x3, x4)) * \
                    (max(y1, y2, y3, y4) - min(y1, y2, y3, y4))
        area_list.append(temp_area)

    sorted_area_index = sorted(list(range(len(area_list))), key=lambda k: area_list[k])
    min_error = float('inf')
    best_index = -1
    rank_num = 10
    # find the best angle with correct orientation
    for index in sorted_area_index[:rank_num]:
        rotated = rotate_vertices(vertices, angle_list[index] / 180 * math.pi)
        temp_error = cal_error(rotated)
        if temp_error < min_error:
            min_error = temp_error
            best_index = index
    return angle_list[best_index] / 180 * math.pi


def rotate_all_pixels(rotate_mat, anchor_x, anchor_y, length):
    x = np.arange(length)
    y = np.arange(length)
    x, y = np.meshgrid(x, y)
    x_lin = x.reshape((1, x.size))
    y_lin = y.reshape((1, x.size))
    coord_mat = np.concatenate((x_lin, y_lin), 0)
    rotated_coord = np.dot(rotate_mat, coord_mat - np.array([[anchor_x], [anchor_y]])) + \
                    np.array([[anchor_x], [anchor_y]])
    rotated_x = rotated_coord[0, :].reshape(x.shape)
    rotated_y = rotated_coord[1, :].reshape(y.shape)
    return rotated_x, rotated_y



def rotate_img(img, vertices, angle_range=10):
    center_x = (img.width - 1) / 2
    center_y = (img.height - 1) / 2
    angle = angle_range * (np.random.rand() * 2 - 1)
    img = img.rotate(angle, Image.BILINEAR)
    new_vertices = np.zeros(vertices.shape)
    for i, vertice in enumerate(vertices):
        new_vertices[i, :] = rotate_vertices(vertice, -angle / 180 * math.pi, np.array([[center_x], [center_y]]))
    return img, new_vertices


def resize(img, vertices, length):
    shape = img.size
    new_image = img.resize((length, length))

    new_vertices = np.zeros(vertices.shape)
    for i, vertice in enumerate(vertices):
      new_vertices[i, [0, 2, 4, 6]] = vertices[i, [0, 2, 4, 6]] * (length / shape[0])
      new_vertices[i, [1, 3, 5, 7]] = vertices[i, [1, 3, 5, 7]] * (length / shape[1])
    
    return new_image, new_vertices


def get_score_geo(img, vertices, scale, length):
    score_map = np.zeros((int(img.height * scale), int(img.width * scale), 1), np.float32)
    geo_map = np.zeros((int(img.height * scale), int(img.width * scale), 5), np.float32)

    index = np.arange(0, length, int(1 / scale))
    index_x, index_y = np.meshgrid(index, index)
    ignored_polys = []
    polys = []

    for i, vertice in enumerate(vertices):
        poly = np.around(scale * shrink_poly(vertice).reshape((4, 2))).astype(np.int32)  # scaled & shrinked
        polys.append(poly)
        temp_mask = np.zeros(score_map.shape[:-1], np.float32)
        cv2.fillPoly(temp_mask, [poly], 1)

        theta = 0
        rotate_mat = get_rotate_mat(theta)

        rotated_vertices = rotate_vertices(vertice, theta)
        x_min, x_max, y_min, y_max = get_boundary(rotated_vertices)
        rotated_x, rotated_y = rotate_all_pixels(rotate_mat, vertice[0], vertice[1], length)

        d1 = rotated_y - y_min
        d1[d1 < 0] = 0
        d2 = y_max - rotated_y
        d2[d2 < 0] = 0
        d3 = rotated_x - x_min
        d3[d3 < 0] = 0
        d4 = x_max - rotated_x
        d4[d4 < 0] = 0
        geo_map[:, :, 0] += d1[index_y, index_x] * temp_mask
        geo_map[:, :, 1] += d2[index_y, index_x] * temp_mask
        geo_map[:, :, 2] += d3[index_y, index_x] * temp_mask
        geo_map[:, :, 3] += d4[index_y, index_x] * temp_mask
        geo_map[:, :, 4] += theta * temp_mask

    cv2.fillPoly(score_map, polys, 1)

    score_map = torch.Tensor(score_map).permute(2, 0, 1)
    geo_map = torch.Tensor(geo_map).permute(2, 0, 1)

    return score_map, geo_map


In [None]:
from torch.utils.data import Dataset
from PIL import Image
from torchvision import transforms
import numpy as np
import json
import os
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


class ReceiptDataset(Dataset):
    def __init__(self, image_paths, boxes, scale=0.25, length=512):
        super(ReceiptDataset, self).__init__()
        self.image_paths = image_paths
        self.boxes = boxes
        self.scale = scale
        self.length = length
        self.tranforms = transforms.Compose([
            transforms.ColorJitter(0.5, 0.5, 0.5, 0.25),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.5, 0.5, 0.5),
                                 std=(0.5, 0.5, 0.5))
        ])

    def __getitem__(self, item):
        vertices = np.array(self.boxes[item], dtype=int)

        image = Image.open(self.image_paths[item])
        image = image.convert('RGB')
        # image, vertices = rotate_img(image, vertices)
        image, vertices = resize(image, vertices, self.length)

        score_map, geo_map = get_score_geo(image, vertices, self.scale, self.length)
        image = self.tranforms(image)

        return image, score_map, geo_map

    def __len__(self):
        return len(self.image_paths)


if __name__ == '__main__':
    with open('EAST/data/images.json', 'r') as f:
        image_paths = json.load(f)
    with open('EAST/data/boxes.json', 'r') as f:
        boxes = json.load(f)

    dataset = ReceiptDataset(image_paths=[image_paths[0]], boxes=[boxes[0]])
    # print(dataset[0])

In [None]:
import torch
from torch import nn


def get_dice_loss(gt_score, pred_score):
    inter = torch.sum(gt_score * pred_score)
    union = torch.sum(gt_score) + torch.sum(pred_score) + 1e-5

    return 1. - 2 * inter / union


def get_geo_loss(gt_geo, pred_geo):
    d1_gt, d2_gt, d3_gt, d4_gt, angle_gt = torch.split(gt_geo, 1, 1)
    d1_pred, d2_pred, d3_pred, d4_pred, angle_pred = torch.split(pred_geo, 1, 1)
    area_gt = (d1_gt + d2_gt) * (d3_gt + d4_gt)
    area_pred = (d1_pred + d2_pred) * (d3_pred + d4_pred)
    w_inter = torch.min(d1_gt, d1_pred) + torch.min(d2_gt, d2_pred)
    h_inter = torch.min(d3_gt, d3_pred) + torch.min(d4_gt, d4_pred)
    area_inter = w_inter * h_inter
    area_union = area_gt + area_pred - area_inter
    iou_loss_map = - torch.log((area_inter + 1.0) / (area_union + 1.0))
    angle_loss_map = 1 - torch.cos(angle_gt - angle_pred)

    return iou_loss_map, angle_loss_map


class Loss(nn.Module):
    def __init__(self, weight_angle=10):
        super(Loss, self).__init__()
        self.weight_angle = weight_angle

    def forward(self, gt_score, pred_score, gt_geo, pred_geo):
        if torch.sum(gt_score) < 1:
            return torch.sum(pred_score + pred_geo) * 0

        classify_loss = get_dice_loss(gt_score, pred_score)
        iou_loss_map, angle_loss_map = get_geo_loss(gt_geo, pred_geo)

        angle_loss = torch.sum(angle_loss_map * gt_score) / torch.sum(gt_score)
        iou_loss = torch.sum(iou_loss_map * gt_score) / torch.sum(gt_score)
        geo_loss = self.weight_angle * angle_loss + iou_loss

        return geo_loss + classify_loss

In [None]:
from torch import nn
import torch.nn.functional as F
import json
import torch
import math



cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']


def make_layers(cfg, batch_norm=False):
    layers = list()
    in_channel = 3

    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels=in_channel, out_channels=v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]

            in_channel = v

    return nn.Sequential(*layers)


class VGG(nn.Module):
    def __init__(self, features):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 1000)
        )
        self.init_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)

    def init_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d):
                nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 0)
            elif isinstance(layer, nn.BatchNorm2d):
                nn.init.constant_(layer.weight, 1)
                nn.init.constant_(layer.bias, 0)
            elif isinstance(layer, nn.Linear):
                nn.init.normal_(layer.weight, 0, 0.01)
                nn.init.constant_(layer.bias, 0)


class Extractor(nn.Module):
    def __init__(self, pretrained):
        super(Extractor, self).__init__()
        vgg16_bn = VGG(make_layers(cfg, batch_norm=True))

        if pretrained:
            vgg16_bn.load_state_dict(torch.load('./EAST/data/vgg16_bn.pth'))
            print('Model loaded')

        self.features = vgg16_bn.features

    def forward(self, x):
        out = list()
        for layer in self.features:
            x = layer(x)
            if isinstance(layer, nn.MaxPool2d):
                out.append(x)

        return out[1:]


class Merge(nn.Module):
    def __init__(self):
        super(Merge, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1024, out_channels=128, kernel_size=1)
        self.bn1 = nn.BatchNorm2d(128)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(128)
        self.relu2 = nn.ReLU()

        self.conv3 = nn.Conv2d(in_channels=384, out_channels=64, kernel_size=1)
        self.bn3 = nn.BatchNorm2d(64)
        self.relu3 = nn.ReLU()
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(64)
        self.relu4 = nn.ReLU()

        self.conv5 = nn.Conv2d(in_channels=192, out_channels=32, kernel_size=1)
        self.bn5 = nn.BatchNorm2d(32)
        self.relu5 = nn.ReLU()
        self.conv6 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.bn6 = nn.BatchNorm2d(32)
        self.relu6 = nn.ReLU()

        self.conv7 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.bn7 = nn.BatchNorm2d(32)
        self.relu7 = nn.ReLU()

        self.init_weights()

    def forward(self, x):
        y = F.interpolate(x[3], scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[2]), dim=1)
        y = self.relu1(self.bn1(self.conv1(y)))
        y = self.relu2(self.bn2(self.conv2(y)))

        y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[1]), dim=1)
        y = self.relu3(self.bn3(self.conv3(y)))
        y = self.relu4(self.bn4(self.conv4(y)))

        y = F.interpolate(y, scale_factor=2, mode='bilinear', align_corners=True)
        y = torch.cat((y, x[0]), dim=1)
        y = self.relu5(self.bn5(self.conv5(y)))
        y = self.relu6(self.bn6(self.conv6(y)))

        y = self.relu7(self.bn7(self.conv7(y)))

        return y

    def init_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d):
                nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 0)

            elif isinstance(layer, nn.BatchNorm2d):
                nn.init.constant_(layer.weight, 1)
                nn.init.constant_(layer.bias, 0)


class Output(nn.Module):
    def __init__(self):
        super(Output, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=1)
        self.sigmoid1 = nn.Sigmoid()

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=4, kernel_size=1)
        self.sigmoid2 = nn.Sigmoid()

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=1, kernel_size=1)
        self.sigmoid3 = nn.Sigmoid()

        self.scope = 512

        self.init_weights()

    def forward(self, x):
        score = self.sigmoid1(self.conv1(x)) 
        loc = self.sigmoid2(self.conv2(x)) * self.scope
        angle = (self.sigmoid3(self.conv3(x)) - 0.5) * math.pi

        geo = torch.cat((loc, angle), dim=1)

        return score, geo

    def init_weights(self):
        for layer in self.modules():
            if isinstance(layer, nn.Conv2d):
                nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
                if layer.bias is not None:
                    nn.init.constant_(layer.bias, 0)


class East(nn.Module):
    def __init__(self, pretrained=False):
        super(East, self).__init__()
        self.extractor = Extractor(pretrained)
        self.merge = Merge()
        self.output = Output()

    def forward(self, x):
        x = self.extractor(x)
        x = self.merge(x)
        x = self.output(x)

        return x


if __name__ == '__main__':
    with open('EAST/data/images.json', 'r') as f:
        image_paths = json.load(f)
    with open('EAST/data/boxes.json', 'r') as f:
        boxes = json.load(f)

    dataset = ReceiptDataset(image_paths[: 2], boxes[: 2])
    model = East()

In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torch.optim import Adam
from tqdm import tqdm
import json
import torch


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Split train test
IMAGE_PATHS = 'EAST/data/images.json'
BOXES = 'EAST/data/boxes.json'

with open(IMAGE_PATHS, 'r') as f:
    image_paths = json.load(f)
with open(BOXES, 'r') as f:
    boxes = json.load(f)

BATCH_SIZE = 32

X_train, X_val, y_train, y_val = train_test_split(image_paths, boxes,
                                                  test_size=0.35, shuffle=True, random_state=2021)
train_dataset = ReceiptDataset(X_train, y_train)
val_dataset = ReceiptDataset(X_val, y_val)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# Model
EPOCHS = 80

model = East()
model = model.to(device)
model = torch.nn.DataParallel(model)
# model.load_state_dict(torch.load('/kaggle/working/east_best.pt.pt'))
lr = 1e-4
loss_fn = Loss().to(device)
optimizer = Adam(model.parameters(), lr=lr)
best_val_loss = float('inf')

train_loss = []
val_loss = []

for epoch in range(EPOCHS):
    print('Epoch {}'.format(epoch + 1))

    model.train()
    epoch_train_loss = 0
    for batch_idx, (X_batch_train, gt_score, gt_geo) in enumerate(tqdm(train_dataloader, desc="Training")):
        X_batch_train = X_batch_train.to(device)
        gt_score = gt_score.to(device)
        gt_geo = gt_geo.to(device)
        pred_score, pred_geo = model(X_batch_train)

        loss = loss_fn(gt_score, pred_score, gt_geo, pred_geo)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_train_loss += loss.item()


    avg_train_loss = epoch_train_loss / len(train_dataloader)
    train_loss.append(avg_train_loss)
    print(f"Train Loss: {avg_train_loss:.4f}")

    
    model.eval()
    epoch_val_loss = 0
    with torch.no_grad():
        for X_batch, gt_score, gt_geo in tqdm(val_dataloader, desc="Validation"):
            X_batch = X_batch.to(device)
            gt_score = gt_score.to(device)
            gt_geo = gt_geo.to(device)

            pred_score, pred_geo = model(X_batch)
            loss = loss_fn(gt_score, pred_score, gt_geo, pred_geo)
            epoch_val_loss += loss.item()

    avg_val_loss = epoch_val_loss / len(val_dataloader)
    val_loss.append(avg_val_loss)
    print(f"Val Loss: {avg_val_loss:.4f}")

    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), 'east_best.pt')
        print(f"Saved best model (Val Loss: {best_val_loss:.4f})")

In [None]:
!pip install lanms

In [None]:
import torch
from torchvision import transforms
from PIL import Image, ImageDraw
import os
import numpy as np
from collections import OrderedDict
import lanms



def resize_img(img):
	w, h = img.size
	resize_w = w
	resize_h = h

	resize_h = resize_h if resize_h % 32 == 0 else int(resize_h / 32) * 32
	resize_w = resize_w if resize_w % 32 == 0 else int(resize_w / 32) * 32
	img = img.resize((resize_w, resize_h), Image.BILINEAR)
	ratio_h = resize_h / h
	ratio_w = resize_w / w

	return img, ratio_h, ratio_w


def load_pil(img):
	t = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])
	return t(img).unsqueeze(0)


def is_valid_poly(res, score_shape, scale):
	cnt = 0
	for i in range(res.shape[1]):
		if res[0, i] < 0 or res[0, i] >= score_shape[1] * scale or \
				res[1, i] < 0 or res[1, i] >= score_shape[0] * scale:
			cnt += 1
	return True if cnt <= 1 else False


def restore_polys(valid_pos, valid_geo, score_shape, scale=4):
	polys = []
	index = []
	valid_pos *= scale
	d = valid_geo[:4, :]  # 4 x N

	for i in range(valid_pos.shape[0]):
		x = valid_pos[i, 0]
		y = valid_pos[i, 1]
		y_min = y - d[0, i] * 1.3
		y_max = y + d[1, i] * 1.3
		x_min = x - d[2, i] * 1.1
		x_max = x + d[3, i] * 1.1

		temp_x = np.array([[x_min, x_max, x_max, x_min]])
		temp_y = np.array([[y_min, y_min, y_max, y_max]])

		coordinate = np.concatenate((temp_x, temp_y), axis=0)

		if is_valid_poly(coordinate, score_shape, scale):
			index.append(i)
			polys.append([coordinate[0, 0], coordinate[1, 0], coordinate[0, 1], coordinate[1, 1],
						coordinate[0, 2], coordinate[1, 2], coordinate[0, 3], coordinate[1, 3]])

	return np.array(polys), index


def get_boxes(score, geo, score_thresh=0.9, nms_thresh=0.2):
	score = score[0, :, :]
	xy_text = np.argwhere(score > score_thresh)
	if xy_text.size == 0:
		return None

	xy_text = xy_text[np.argsort(xy_text[:, 0])]
	valid_pos = xy_text[:, ::-1].copy()
	valid_geo = geo[:, xy_text[:, 0], xy_text[:, 1]]
	polys_restored, index = restore_polys(valid_pos, valid_geo, score.shape) 
	if polys_restored.size == 0:
		return None

	boxes = np.zeros((polys_restored.shape[0], 9), dtype=np.float32)
	boxes[:, :8] = polys_restored
	boxes[:, 8] = score[xy_text[index, 0], xy_text[index, 1]]
	boxes = lanms.merge_quadrangle_n9(boxes.astype('float32'), nms_thresh)

	return boxes


def adjust_ratio(boxes, ratio_w, ratio_h):
	if boxes is None or boxes.size == 0:
		return None
	boxes[:, [0, 2, 4, 6]] /= ratio_w
	boxes[:, [1, 3, 5, 7]] /= ratio_h
	return np.around(boxes)
	
	
def detect(img, model, device):
	img, ratio_h, ratio_w = resize_img(img)
	with torch.no_grad():
		score, geo = model(load_pil(img).to(device))
	boxes = get_boxes(score.squeeze(0).cpu().numpy(), geo.squeeze(0).cpu().numpy())
	return adjust_ratio(boxes, ratio_w, ratio_h)


def plot_boxes(img, boxes):
	if boxes is None:
		return img
	
	draw = ImageDraw.Draw(img)
	for box in boxes:
		draw.polygon([box[0], box[1], box[2], box[3], box[4], box[5], box[6], box[7]], outline=(0, 255, 0))
	return img


if __name__ == '__main__':
    model_path = '/kaggle/working/east_best.pt'
    img_path = '/kaggle/input/icar-data/0325updated.task1train(626p)/X00016469612.jpg'
    res_img = './res.png'

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    model = East().to(device)
    state_dict = torch.load('/kaggle/working/east_best.pt', map_location=device)
    
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if k.startswith('module.'):
            k = k[7:]  
        new_state_dict[k] = v
    
    # Load vào model
    model.load_state_dict(new_state_dict)
    model.eval()
    img = Image.open(img_path)

    boxes = detect(img, model, device)

    print(boxes)

    plot_img = plot_boxes(img, boxes)
    plot_img.save(res_img)

# CRNN

In [9]:
import os
from PIL import Image
import json



def parse_annotation(annotation):
    boxes = list()
    texts = list()

    with open(annotation, 'r') as f:
        for line in f.readlines():
            line_spl = line.split(',')

            full_box = [int(line_spl[i]) for i in range(8)]
            box_len = ' '.join([str(element) for element in full_box])
            text = line[len(box_len) + 1:-1]

            box = [full_box[0], full_box[1], full_box[4], full_box[5]]

            boxes.append(box)
            texts.append(text)

    return boxes, texts


def create_data(data_directory='data/raw_data/train',
                image_directory='data/task2/image', annotation_directory='data/task2/annotation'):
    all_image_paths = list()
    all_texts = list()

    index = 0
    for file in os.listdir(data_directory):
        if file.endswith('.txt'):
            if file[: -4] + '.jpg' not in os.listdir(data_directory):
                continue

            annotation = os.path.join(os.getcwd(), data_directory, file)
            image_path = os.path.join(os.getcwd(), data_directory, file[: -4] + '.jpg')

            boxes, texts = parse_annotation(annotation)
            image = Image.open(image_path)

            for i, box in enumerate(boxes):
                index += 1
                crop_image_path = os.path.join(image_directory, str(index) + '.jpg')
                annotation_path = os.path.join(annotation_directory, str(index) + '.txt')
                all_image_paths.append(crop_image_path)

                crop_image = image.crop(box)
                crop_image.save(crop_image_path)

                with open(annotation_path, 'w') as f:
                    f.write(texts[i])
                    all_texts.append(texts[i])

    with open(os.path.join(image_directory, 'images.json'), 'w') as f:
        json.dump(all_image_paths, f)

    with open(os.path.join(annotation_directory, 'texts.json'), 'w') as f:
        json.dump(all_texts, f)


def create_vocab(annotation_directory='data/task2/annotation'):
    vocab = set()

    for file in os.listdir(annotation_directory):
        if not file.endswith('.txt'):
            continue
        with open(os.path.join(os.getcwd(), annotation_directory, file), 'r') as f:
            text = f.read()
            vocab.update(list(text))

    vocab = sorted(vocab)

    with open(os.path.join(os.getcwd(), annotation_directory, 'vocab.json'), 'w') as f:
        json.dump(list(vocab), f)

    # return vocab


def create_map(vocab):
    map = {i + 1: char for i, char in enumerate(vocab)}
    rev_map = {char: i for i, char in map.items()}

    return map, rev_map


def encode(text):
    with open('data/task2/annotation/vocab.json', 'r') as f:
        vocab = json.load(f)

    map, rev_map = create_map(vocab)

    text_encode = [rev_map[text[i]] for i in range(len(text))]

    return text_encode, len(text)


def decode(labels):
    with open('data/task2/annotation/vocab.json', 'r') as f:
        vocab = json.load(f)

    map, rev_map = create_map(vocab)

    text_decode = [map[i] for i in labels]
    text_decode = ''.join(text_decode)

    return text_decode


In [10]:
os.makedirs("/kaggle/working/data/task2/image", exist_ok=True)

In [11]:
os.makedirs("/kaggle/working/data/task2/annotation", exist_ok=True)

In [12]:
create_data(data_directory="/kaggle/input/icar-data/0325updated.task1train(626p)")

In [13]:
create_vocab(annotation_directory='/kaggle/working/data/task2/annotation')

In [14]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.transforms import ToTensor, Normalize
from PIL import Image, ImageOps
import numpy as np
import os




class ReceiptDataset(Dataset):
    def __init__(self, image_paths, texts, width=280, height=64):
        self.image_paths = image_paths
        self.texts = texts
        self.transform = ResizeNormalize(width, height)

    def __getitem__(self, item):
        image_path = self.image_paths[item]
        image = Image.open(image_path)
        image = ImageOps.grayscale(image)
        text = self.texts[item]

        image = self.transform(image)

        return image, text

    def __len__(self):
        return len(self.image_paths)


class ResizeNormalize(object):
    def __init__(self, width=280, height=64):
        self.scale_width = width
        self.scale_height = height
        self.transforms = transforms.Compose([
            ToTensor(),
            Normalize(mean=0.5,
                      std=0.5)
        ])

    def __call__(self, image):
        w, h = image.size
        new_height = self.scale_height
        new_width = w * (new_height / h)
        new_width = int(new_width)

        if new_width >= self.scale_width:
            image = image.resize((self.scale_width, self.scale_height))
        else:
            image = image.resize((new_width, new_height))
            image_pad = np.zeros((self.scale_height, self.scale_width))
            image_pad[: new_height, : new_width] = image
            image = image_pad
            image = Image.fromarray(np.uint8(image))

        image = self.transforms(image)
        return image


def collate_fn(batch):
    images = list()
    text_encodes = list()
    text_lens = list()
    for b in batch:
        images.append(b[0])
        text_encode, text_len = encode(b[1])
        text_encodes += text_encode
        text_lens.append(text_len)

    return torch.stack(images, dim=0), torch.tensor(text_encodes), torch.tensor(text_lens)


if __name__ == '__main__':
    dataset = ReceiptDataset(['data/task2/image/37553.jpg'], ['abc'])
    print(dataset[0][0].size())

torch.Size([1, 64, 280])


In [15]:
from torch import nn
import torchvision



class ConvolutionLayer(nn.Module):
    def __init__(self, in_channels=1, pretrained=False):
        super(ConvolutionLayer, self).__init__()
        self.pretrained = pretrained

        self.conv1_1 = nn.Conv2d(in_channels=in_channels, out_channels=64, kernel_size=3, padding=1)  # (64, 64, 280)
        self.relu1_1 = nn.ReLU(inplace=True)
        self.conv1_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)  # (64, 64, 280)
        self.relu1_2 = nn.ReLU(inplace=True)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # (64, 32, 140)

        self.conv2_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)  # (128, 32, 140)
        self.relu2_1 = nn.ReLU(inplace=True)
        self.conv2_2 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1)  # (128, 32, 140)
        self.relu2_2 = nn.ReLU(inplace=True)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # (128, 16, 70)

        self.conv3_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)  # (256, 16, 70)
        self.relu3_1 = nn.ReLU(inplace=True)
        self.conv3_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)  # (256, 16, 70)
        self.relu3_2 = nn.ReLU(inplace=True)
        self.conv3_3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)  # (256, 16, 70)
        self.relu3_3 = nn.ReLU(inplace=True)
        self.pool3 = nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1))  # (256, 8, 70)

        self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)  # (512, 8, 70)
        self.relu4_1 = nn.ReLU(inplace=True)
        self.bn4_1 = nn.BatchNorm2d(num_features=512)  # (512, 8, 70)
        self.conv4_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)  # (512, 8, 70)
        self.relu4_2 = nn.ReLU(inplace=True)
        self.bn4_2 = nn.BatchNorm2d(num_features=512)  # (512, 8, 70)
        self.pool4 = nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1))  # (512, 4, 70)
        self.conv4_3 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)  # (512, 4, 70)
        self.relu4_3 = nn.ReLU(inplace=True)

        self.init_weight()

    def forward(self, x):
        out = self.relu1_1(self.conv1_1(x))
        out = self.relu1_2(self.conv1_2(out))
        out = self.pool1(out)

        out = self.relu2_1(self.conv2_1(out))
        out = self.relu2_2(self.conv2_2(out))
        out = self.pool2(out)

        out = self.relu3_1(self.conv3_1(out))
        out = self.relu3_2(self.conv3_2(out))
        out = self.relu3_3(self.conv3_3(out))
        out = self.pool3(out)

        out = self.relu4_1(self.conv4_1(out))
        out = self.bn4_1(out)
        out = self.relu4_2(self.conv4_2(out))
        out = self.bn4_2(out)
        out = self.relu4_3(self.conv4_3(out))
        out = self.pool4(out)

        return out

    def init_weight(self):
        state_dict = self.state_dict()

        pretrained_state_dict = torchvision.models.vgg16(pretrained=self.pretrained).state_dict()

        state_dict['conv1_2.weight'] = pretrained_state_dict['features.2.weight']
        state_dict['conv1_2.bias'] = pretrained_state_dict['features.2.bias']

        state_dict['conv2_1.weight'] = pretrained_state_dict['features.5.weight']
        state_dict['conv2_1.bias'] = pretrained_state_dict['features.5.bias']

        state_dict['conv2_2.weight'] = pretrained_state_dict['features.7.weight']
        state_dict['conv2_2.bias'] = pretrained_state_dict['features.7.bias']

        state_dict['conv3_1.weight'] = pretrained_state_dict['features.10.weight']
        state_dict['conv3_1.bias'] = pretrained_state_dict['features.10.bias']

        state_dict['conv3_2.weight'] = pretrained_state_dict['features.12.weight']
        state_dict['conv3_2.bias'] = pretrained_state_dict['features.12.bias']

        state_dict['conv3_3.weight'] = pretrained_state_dict['features.14.weight']
        state_dict['conv3_3.bias'] = pretrained_state_dict['features.14.bias']

        state_dict['conv4_1.weight'] = pretrained_state_dict['features.17.weight']
        state_dict['conv4_1.bias'] = pretrained_state_dict['features.17.bias']
        state_dict['conv4_2.weight'] = pretrained_state_dict['features.19.weight']
        state_dict['conv4_2.bias'] = pretrained_state_dict['features.19.bias']
        state_dict['conv4_3.weight'] = pretrained_state_dict['features.21.weight']
        state_dict['conv4_3.bias'] = pretrained_state_dict['features.21.bias']

        for layer in self.modules():
            if isinstance(layer, nn.BatchNorm2d):
                nn.init.constant_(layer.weight, 1)
                nn.init.constant_(layer.bias, 0)


class RNNLayer(nn.Module):
    def __init__(self, n_classes, hidden_dim=256):
        super(RNNLayer, self).__init__()
        self.n_classes = n_classes
        self.hidden_dim = hidden_dim
        self.lstm1 = nn.LSTM(input_size=self.hidden_dim, hidden_size=self.hidden_dim,
                             bidirectional=True, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=self.hidden_dim*2, hidden_size=self.hidden_dim,
                             bidirectional=True, batch_first=True)
        self.linear = nn.Linear(self.hidden_dim * 2, self.n_classes)

        self.init_weight()

    def forward(self, x):
        out, _ = self.lstm1(x)
        out, _ = self.lstm2(out)
        out = self.linear(out)

        return out

    def init_weight(self):
        for layer in self.modules():
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)


class CRNN(nn.Module):
    def __init__(self, pretrained=False, hidden_size=256, n_classes=73):
        super(CRNN, self).__init__()
        self.convolution_layer = ConvolutionLayer()
        self.rnn_layer = RNNLayer(n_classes=n_classes)
        self.linear = nn.Linear(in_features=2048, out_features=hidden_size)

        self.init_weight()

    def forward(self, x):
        out = self.convolution_layer(x)  # (N, 512, 4, 70)
        out = out.permute(0, 3, 1, 2)  # (N, 70, 512, 4)
        out = out.view(-1, 70, 2048)  # (N, 70, 2048)
        out = self.linear(out)  # (N, 70, 256)
        out = out.permute(1, 0, 2)  # (70, N, 256)
        out = self.rnn_layer(out)  # (70, N, 73)

        return out

    def init_weight(self):
        for layer in self.modules():
            if isinstance(layer, nn.Linear):
                nn.init.xavier_uniform_(layer.weight)
                nn.init.constant_(layer.bias, 0)

class TransformerLayer(nn.Module):
    def __init__(self, d_model=256, nhead=4, num_layers=2, dim_feedforward=512, dropout=0.1, n_classes=73):
        super(TransformerLayer, self).__init__()
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(d_model, n_classes)
        self.init_weights()

    def forward(self, x):
        # x: (N, T, d_model)
        x = self.transformer_encoder(x)  # (N, T, d_model)
        x = self.fc(x)  # (N, T, n_classes)
        return x.permute(1, 0, 2)  # (T, N, n_classes)

    def init_weights(self):
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)
class CTRN(nn.Module):
    def __init__(self, pretrained=False, hidden_size=256, n_classes=73):
        super(CTRN, self).__init__()
        self.convolution_layer = ConvolutionLayer(pretrained=pretrained)
        self.linear = nn.Linear(in_features=2048, out_features=hidden_size)
        self.transformer_layer = TransformerLayer(
            d_model=hidden_size,
            nhead=4,
            num_layers=2,
            dim_feedforward=hidden_size * 2,
            n_classes=n_classes
        )
        self.init_weight()

    def forward(self, x):
        out = self.convolution_layer(x)  # (N, 512, 4, 70)
        out = out.permute(0, 3, 1, 2)  # (N, 70, 512, 4)
        out = out.contiguous().view(out.size(0), out.size(1), -1)  # (N, 70, 2048)
        out = self.linear(out)  # (N, 70, 256)
        out = self.transformer_layer(out)  # (70, N, n_classes)
        return out

    def init_weight(self):
        nn.init.xavier_uniform_(self.linear.weight)
        nn.init.constant_(self.linear.bias, 0)



In [16]:
import torch
from torch import nn
import json
import os
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torch.optim import Adam
from tqdm import tqdm



def train_batch(model, images, text_encodes, text_lens, optimizer, criterion, device):
    model.train()
    images = images.to(device)
    text_encodes = text_encodes.to(device)
    text_lens = text_lens.to(device)

    logits = model(images)

    log_probs = torch.nn.functional.log_softmax(logits, dim=-1)

    batch_size = logits.size(1)
    input_lengths = torch.LongTensor([logits.size(0)] * batch_size)    
    target_lengths = torch.flatten(text_lens)

    loss = criterion(log_probs, text_encodes, input_lengths, target_lengths)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss.item()



IMAGE_PATH = '/kaggle/working/data/task2/image/images.json'
TEXT_PATH = '/kaggle/working/data/task2/annotation/texts.json'
BATCH_SIZE = 128

with open(IMAGE_PATH, 'r') as f:
    image_paths = json.load(f)
with open(TEXT_PATH, 'r') as f:
    texts = json.load(f)

X_train, X_test, y_train, y_test = train_test_split(image_paths, texts, test_size=0.3,
                                                    shuffle=True, random_state=2020)

train_dataset = ReceiptDataset(X_train, y_train)
val_dataset = ReceiptDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)

n_epochs = 30
lr = 1e-5

# model.load_state_dict(torch.load('./crnn.pth', map_location=torch.device('cpu')))

criterion = nn.CTCLoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CRNN().to(device)
optimizer = Adam(model.parameters())

best_loss = float('inf') 

train_losses = []
for epoch in tqdm(range(n_epochs), desc='Training'):
    print(f'\nEpoch {epoch + 1}: ')
    train_batch_losses = []

    for images, text_encodes, text_lens in train_dataloader:
        loss = train_batch(model, images, text_encodes, text_lens, optimizer, criterion, device)
        train_batch_losses.append(loss)

    epoch_loss = sum(train_batch_losses) / len(train_batch_losses)
    train_losses.append(epoch_loss)
    print(f'===> Epoch {epoch + 1} Loss: {epoch_loss:.4f}')

    if epoch_loss < best_loss:
        best_loss = epoch_loss
        torch.save(model.state_dict(), 'crnn_model.pth')
        print(f'Saved better model at epoch {epoch + 1} with loss {best_loss:.4f}')



Training:   0%|          | 0/30 [00:00<?, ?it/s]


Epoch 1: 


Training:   3%|▎         | 1/30 [03:15<1:34:27, 195.45s/it]

===> Epoch 1 Loss: 4.3755
Saved better model at epoch 1 with loss 4.3755

Epoch 2: 


Training:   7%|▋         | 2/30 [06:40<1:33:56, 201.30s/it]

===> Epoch 2 Loss: 3.4156
Saved better model at epoch 2 with loss 3.4156

Epoch 3: 


Training:  10%|█         | 3/30 [10:07<1:31:45, 203.89s/it]

===> Epoch 3 Loss: 1.9040
Saved better model at epoch 3 with loss 1.9040

Epoch 4: 


Training:  13%|█▎        | 4/30 [13:34<1:28:49, 204.99s/it]

===> Epoch 4 Loss: 0.4803
Saved better model at epoch 4 with loss 0.4803

Epoch 5: 


Training:  17%|█▋        | 5/30 [17:00<1:25:29, 205.18s/it]

===> Epoch 5 Loss: 0.2269
Saved better model at epoch 5 with loss 0.2269

Epoch 6: 


Training:  20%|██        | 6/30 [20:25<1:22:10, 205.45s/it]

===> Epoch 6 Loss: 0.1518
Saved better model at epoch 6 with loss 0.1518

Epoch 7: 


Training:  23%|██▎       | 7/30 [23:51<1:18:45, 205.45s/it]

===> Epoch 7 Loss: 0.1139
Saved better model at epoch 7 with loss 0.1139

Epoch 8: 


Training:  27%|██▋       | 8/30 [27:16<1:15:18, 205.37s/it]

===> Epoch 8 Loss: 0.0906
Saved better model at epoch 8 with loss 0.0906

Epoch 9: 


Training:  30%|███       | 9/30 [30:41<1:11:48, 205.19s/it]

===> Epoch 9 Loss: 0.0825
Saved better model at epoch 9 with loss 0.0825

Epoch 10: 


Training:  33%|███▎      | 10/30 [34:05<1:08:17, 204.87s/it]

===> Epoch 10 Loss: 0.0723
Saved better model at epoch 10 with loss 0.0723

Epoch 11: 


Training:  37%|███▋      | 11/30 [37:30<1:04:52, 204.87s/it]

===> Epoch 11 Loss: 0.0564
Saved better model at epoch 11 with loss 0.0564

Epoch 12: 


Training:  40%|████      | 12/30 [40:54<1:01:24, 204.69s/it]

===> Epoch 12 Loss: 0.0499
Saved better model at epoch 12 with loss 0.0499

Epoch 13: 


Training:  43%|████▎     | 13/30 [44:18<57:55, 204.46s/it]  

===> Epoch 13 Loss: 0.0545

Epoch 14: 


Training:  47%|████▋     | 14/30 [47:42<54:28, 204.30s/it]

===> Epoch 14 Loss: 0.0439
Saved better model at epoch 14 with loss 0.0439

Epoch 15: 


Training:  50%|█████     | 15/30 [51:06<51:02, 204.16s/it]

===> Epoch 15 Loss: 0.0359
Saved better model at epoch 15 with loss 0.0359

Epoch 16: 


Training:  53%|█████▎    | 16/30 [54:30<47:36, 204.05s/it]

===> Epoch 16 Loss: 0.0362

Epoch 17: 


Training:  57%|█████▋    | 17/30 [57:53<44:11, 203.96s/it]

===> Epoch 17 Loss: 0.0399

Epoch 18: 


Training:  60%|██████    | 18/30 [1:01:17<40:46, 203.86s/it]

===> Epoch 18 Loss: 0.0359
Saved better model at epoch 18 with loss 0.0359

Epoch 19: 


Training:  63%|██████▎   | 19/30 [1:04:41<37:21, 203.81s/it]

===> Epoch 19 Loss: 0.0461

Epoch 20: 


Training:  67%|██████▋   | 20/30 [1:08:05<34:00, 204.00s/it]

===> Epoch 20 Loss: 0.0333
Saved better model at epoch 20 with loss 0.0333

Epoch 21: 


Training:  70%|███████   | 21/30 [1:11:29<30:36, 204.04s/it]

===> Epoch 21 Loss: 0.0272
Saved better model at epoch 21 with loss 0.0272

Epoch 22: 


Training:  73%|███████▎  | 22/30 [1:14:53<27:11, 203.97s/it]

===> Epoch 22 Loss: 0.0289

Epoch 23: 


Training:  77%|███████▋  | 23/30 [1:18:17<23:47, 203.94s/it]

===> Epoch 23 Loss: 0.0230
Saved better model at epoch 23 with loss 0.0230

Epoch 24: 


Training:  80%|████████  | 24/30 [1:21:41<20:23, 203.87s/it]

===> Epoch 24 Loss: 0.0226
Saved better model at epoch 24 with loss 0.0226

Epoch 25: 


Training:  83%|████████▎ | 25/30 [1:25:04<16:58, 203.79s/it]

===> Epoch 25 Loss: 0.0196
Saved better model at epoch 25 with loss 0.0196

Epoch 26: 


Training:  87%|████████▋ | 26/30 [1:28:28<13:34, 203.65s/it]

===> Epoch 26 Loss: 0.0172
Saved better model at epoch 26 with loss 0.0172

Epoch 27: 


Training:  90%|█████████ | 27/30 [1:31:51<10:11, 203.69s/it]

===> Epoch 27 Loss: 0.0243

Epoch 28: 


Training:  93%|█████████▎| 28/30 [1:35:15<06:47, 203.71s/it]

===> Epoch 28 Loss: 0.0510

Epoch 29: 


Training:  97%|█████████▋| 29/30 [1:38:39<03:23, 203.66s/it]

===> Epoch 29 Loss: 0.0340

Epoch 30: 


Training: 100%|██████████| 30/30 [1:42:02<00:00, 204.10s/it]

===> Epoch 30 Loss: 0.0273



