# TextFuseNet environment setup

In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

⏬ Downloading https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:18
🔁 Restarting kernel...


In [None]:
!conda update -n base conda

Collecting package metadata (current_repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - done
Solving environment: | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | done

## Package Plan ##

  environment location: /usr/local

  added / updated specs:
    - conda


The following packages will be downloaded:

    package                    |            build
    --------------------------

In [None]:
%%bash
# create a new conda environment.
conda create --name textfusenet python=3.7.3
conda activate textfusenet
# install pytorch 1.3.1.
conda install pytorch=1.3.1 torchvision cudatoolkit=10.1 -c pytorch

# install other libraries for building TextFuseNet-detectron2. You can get detailed versions from the requirements.txt, 
# and difference between different versions may lead to unknown influence on performance.
pip install opencv-python
pip install tensorboard
pip install yacs
pip install tqdm
pip install termcolor
pip install tabulate
pip install matplotlib
pip install cloudpickle
pip install wheel

# Model Download

In [None]:
import os

colab_path = ""
os.chdir(colab_path)
!git clone https://github.com/ying09/TextFuseNet.git
!unzip TextFuseNet-master
os.chdir("TextFuseNet")
os.getcwd()

In [None]:
# rebuild fvcore, we rebuild it for training more conveniently.
!pip install fvcore-master.zip

# build TextFuseNet-detectron2, which needs cuda-10.1 and gcc-4.9.
!python setup.py build develop

# Data preprocess

<h3> Download thumbnails from Youtube</h3>

In [None]:
from tqdm import tqdm
import pandas as pd
import requests
import cv2

os.chdir(colab_path) if os.getcwd() != colab_path else os.getcwd()
path = ""
csv = "*.csv"  # csv include all youtube video id
df = pd.read_csv(f"{path}/{csv}")
ids = df["id"]

for id in tqdm(ids):
    url = f"https://img.youtube.com/vi/{id}/maxresdefault.jpg"
    img = requests.get(url)

    with open(f"{path}/thumbnails/{id}.jpg","wb") as file:
        file.write(img.content)
    file.close()

<h3>Run model</h3>

In [None]:
path = "/content/drive/MyDrive/Colab Notebooks"
df = pd.read_csv(f"{path}/{csv}")
ids = df["id"]

os.chdir("TextFuseNet") if os.getcwd() != f"{colab_path}/TextFuseNet" else os.getcwd()


import argparse
import glob
import multiprocessing as mp
import os
import time
import cv2
import tqdm
import numpy as np

from detectron2.config import get_cfg
from detectron2.data.detection_utils import read_image
from detectron2.utils.logger import setup_logger

from demo.predictor import VisualizationDemo

# constants
WINDOW_NAME = "COCO detections"


def setup_cfg(args):
    # load config from file and command-line arguments
    cfg = get_cfg()
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    # Set model
    cfg.MODEL.WEIGHTS = args.weights
    # Set score_threshold for builtin models
    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
    cfg.freeze()
    return cfg


def get_parser():
    parser = argparse.ArgumentParser(description="Detectron2 Demo")
    parser.add_argument(
        "--config-file",
        default="./configs/ocr/icdar2013_101_FPN.yaml",
        metavar="FILE",
        help="path to config file",
    )

    parser.add_argument(
        "--weights",
        default="./out_dir_r101/icdar2013_model/model_ic13_r101.pth",
        metavar="pth",
        help="the model used to inference",
    )

    parser.add_argument(
        "--input",
        default="../taitra_data/thumbnails/*.jpg",
        nargs="+",
        help="the folder of icdar2013 test images"
    )

    parser.add_argument(
        "--output",
        default="./Taitra_img/",
        help="A file or directory to save output visualizations. "
        "If not given, will show output in an OpenCV window.",
    )

    parser.add_argument(
        "--confidence-threshold",
        type=float,
        default=0.7,
        help="Minimum score for instance predictions to be shown",
    )
    parser.add_argument(
        "--opts",
        help="Modify config options using the command-line 'KEY VALUE' pairs",
        default=[],
        nargs=argparse.REMAINDER,
    )
    return parser


def save_result_to_txt(txt_save_path,prediction,polygons):

    file = open(txt_save_path,'w')
    classes = prediction['instances'].pred_classes
    boxes = prediction['instances'].pred_boxes.tensor

    for i in range(len(classes)):
        if classes[i]==0:
            xmin = str(int(boxes[i][0]))
            ymin = str(int(boxes[i][1]))
            xmax = str(int(boxes[i][2]))
            ymax = str(int(boxes[i][3]))

            file.writelines(xmin+','+ymin+','+xmax+','+ymax+',')
            file.writelines('\r\n')
    file.close()


if __name__ == "__main__":

    args = get_parser().parse_args(args=[])

    cfg = setup_cfg(args)
    detection_demo = VisualizationDemo(cfg)

    test_images_path = args.input
    output_path = args.output
    start_time_all = time.time()
    img_count = 0
    for i in glob.glob(test_images_path):
        print(i)
        img_name = os.path.basename(i)
        img_save_path = output_path + img_name.split('.')[0] + '.jpg'
        img = cv2.imread(i)
        start_time = time.time()

        prediction, vis_output, polygons = detection_demo.run_on_image(img)

        txt_save_path = output_path + 'res_img' + img_name.split('.')[0]+ '.txt'
        print(txt_save_path)
        save_result_to_txt(txt_save_path,prediction,polygons)

        print("Time: {:.2f} s / img".format(time.time() - start_time))
        vis_output.save(img_save_path)
        img_count += 1
    print(img_count)
    print("Average Time: {:.2f} s /img".format((time.time() - start_time_all) / img_count))
