In [1]:
!pip -q install -U scikit-image
!pip -q install einops
!pip -q install lmdb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.7/14.7 MB[0m [31m44.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m299.2/299.2 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
#Load model and weights
!gdown 1VU-qucVHcNbu2qnGkWWNwO0TRhY-7iH2
!unzip -q vietocr.zip
!unzip -q weights.zip

#Import model
from vietocr import *

Downloading...
From: https://drive.google.com/uc?id=1VU-qucVHcNbu2qnGkWWNwO0TRhY-7iH2
To: /content/vietocr.zip
100% 142M/142M [00:05<00:00, 25.2MB/s]


In [5]:
#Load config
def load_config(yml_path):
  with open(yml_path, "r") as stream:
    try:
      config = yaml.safe_load(stream)
      return config
    except yaml.YAMLError as exc:
      print(exc)

#Preprocessing
def remove_background(image):
  gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
  thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
  # Remove horizontal lines
  horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1))
  remove_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
  cnts = cv2.findContours(remove_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  cnts = cnts[0] if len(cnts) == 2 else cnts[1]
  for c in cnts:
    cv2.drawContours(thresh, [c], -1, (0,255,255), 5)
  # Remove vertical lines
  vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,40))
  remove_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
  cnts = cv2.findContours(remove_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  cnts = cnts[0] if len(cnts) == 2 else cnts[1]
  for c in cnts:
    cv2.drawContours(thresh, [c], -1, (0,255,255), 15)
  contours,hierarchy = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
  bounding_boxes = []
  for cnt in contours:
    if cv2.contourArea(cnt)>50:
      [x,y,w,h] = cv2.boundingRect(cnt)
      if (x, y) != (0, 0) and (y+h-y)/image.shape[0] > 0.18 and (x+w-x)/image.shape[1] > 0.01 and (x+w-x)/(y+h-y) > 0.15:
        bounding_boxes.append([x,y,x+w,y+h])
  if np.array(bounding_boxes).size != 0:
    yymin = min(np.array(bounding_boxes)[:, 1])
    xxmax = max(np.array(bounding_boxes)[:, 2])
    yymax = max(np.array(bounding_boxes)[:, 3])
    xxmax = xxmax + 40 if xxmax + 40 < image.shape[1] else xxmax
    yymin = yymin - 5 if yymin - 5 > 0 else yymin
    yymax = yymax + 5 if yymax + 5 < image.shape[0] else yymax
    return image[yymin:yymax, 0:xxmax]
  else:
    return image

#Inference
def prediction(img_path):
  img = cv2.imread(img_path)
  processed_img = remove_background(img)
  img = Image.fromarray(processed_img)
  s = detector.predict(img, return_prob = False)
  return s

def predict(image_folder, output_file_path):
    prediction = pd.DataFrame(columns=['id', 'answer', 'elapsed_time'])
    index = 0
    for person_id in os.listdir(image_folder):
        for image_id in os.listdir(os.path.join(image_folder, person_id)):
            fp = os.path.join(image_folder, person_id, image_id)
            image_id = os.path.join(person_id, image_id)
            image = cv2.imread(fp)

            # Start inference
            start = time.time()
            processed_img = remove_background(image) # preprocess
            img = Image.fromarray(processed_img)
            answer = s = detector.predict(img, return_prob = False) # infer
            answer = answer.replace('Đp', 'Đg').replace('đp', 'đg') # post process
            end = time.time()

            prediction.loc[index] = [image_id, answer, end - start]
            index += 1
    # Write prediction
    prediction.to_csv(output_file_path, index=False)

In [None]:
#Update config
config = load_config('/content/base.yml')
config['weights'] = '/content/weights/transformerocr.pth' #trained on our modified dataset

#Load predictor
detector = Predictor(config)

In [6]:
#Load publictest
!gdown 1b2_B1HsssTCFBtLMG9xzndxmygM3TLCF
!unzip public_test.zip

Downloading...
From: https://drive.google.com/uc?id=1b2_B1HsssTCFBtLMG9xzndxmygM3TLCF
To: /content/public_test.zip
100% 36.4M/36.4M [00:01<00:00, 27.4MB/s]


In [15]:
import pandas as pd
predict("/content/public_test/images", "team_00_private_test_pred.csv")

In [17]:
pd.read_csv('/content/team_00_private_test_pred.csv')[['id', 'answer']].to_csv('private_test_pred.csv', index = False)

In [12]:
def check_modelsize(config):
  model, vocab = build_model(config)
  weights = config['weights']
  model.load_state_dict(torch.load(weights, map_location=torch.device(config['device'])))
  param_size = 0
  for param in model.parameters():
      param_size += param.nelement() * param.element_size()
  buffer_size = 0
  for buffer in model.buffers():
      buffer_size += buffer.nelement() * buffer.element_size()
  size_all_mb = (param_size + buffer_size) / 1024**2
  print('model size: {:.3f}MB'.format(size_all_mb))

check_modelsize(config)

model size: 144.666MB
