

<img src="https://dl.fbaipublicfiles.com/detectron2/Detectron2-Logo-Horz.png" width="500">


# Run inference in images

In this last step we run all images adquires through the two best models selected by highest F1-score and Recall

We transform the information outputed by the net into a georeferenced dataset and apply some operations to handle overlapping objects

# Setting paths and bools

DIR is where your project path is and RGB is to set if you are working with RGB or with panchromatic images

In [None]:
%env DIR=/eos/jeodpp/data/projects/REFOCUS/data/swalim_v2

%env RGB=False

In [None]:
import sys
import os
syspath = "{}/code/scripts/GDAL-python".format(os.environ['DIR'])
sys.path.append(syspath)
import shapefile

from pathlib import Path

# import some common libraries
import matplotlib.pyplot as plt
import cv2
from PIL import Image

from osgeo import osr, ogr, gdal


import json
import pandas as pd
import random
import glob
import shutil
import cv2
import numpy as np


# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common detectron2 utilities
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg

### Create the paths where we will save all data

This path also contains from the previous step the csv files with the final results

In [None]:
if os.getenv('RGB') == 'False':
    results_path = '{}/outputs/second_iter/pancro_300/'.format(os.getenv('DIR'))
else:
    results_path = '{}/outputs/second_iter/rgb_300/'.format(os.getenv('DIR'))


Some function to run the two best models for each dataset

In [None]:
def load_conf_file(path):
    config_file_path = path
    # The power value applied to image_count when calcualting frequency weight
    weights_path = "{}model_final.pth".format(path.split('config.yaml')[0])
    cfg = get_cfg() 
    cfg.set_new_allowed(True)

    cfg.merge_from_file(config_file_path)
    cfg.DATASETS.TRAIN = ("swalim_train", )
    cfg.DATASETS.TEST = ("swalim_test", )
    cfg.DATALOADER.NUM_WORKERS = 4
    cfg.MODEL.WEIGHTS = weights_path
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    return cfg
    
def Inference(config, path_list_imgs, table, path_copy_im):
    cfg = load_conf_file(config)
    predictor = DefaultPredictor(cfg)

    with open(path_list_imgs) as f:
        lines = f.read().splitlines()

    for d in lines: 
        im = cv2.imread(d)
        outputs = predictor(im)  
        out = outputs["instances"].to("cpu")
        box = out.pred_boxes
        scores = out.scores
        classes = out.pred_classes.tolist() 
        boxes =  box.tensor.detach().numpy()
        print(boxes)
        if len(boxes>0):
            control = np.zeros((im.shape[0],im.shape[1]), dtype=int)
            for i in range(len(boxes)):
                control[int(boxes[i][1]): int(boxes[i][3]), int(boxes[i][0]): int(boxes[i][2]),] = 1

            name =  '{}{}'.format(path_copy_im, d.split('.tif')[0].split('/')[-1])
            shapefile.ArrayToPoly(d,control,name, path_copy_im, config, scores)
            
def Inference_all(t, df): 
    #if runned on another platform pick the path and change it,
    #if not directly df['Path']
    config = df['Path'].replace('/mnt/content/drive/MyDrive/JRC/Swalim_project/swalim_final_clean/outputs/second_iter/pancro_300/', results_path)
    
    #Inference on non annotated images
    if os.getenv('RGB') == 'False':
        orig_img_dir = "{}/inputs/pancro/img_without_ann".format(os.environ['DIR'])
        path_list_imgs = "{}/inputs/pancro_listwithoutann.csv".format(os.environ['DIR'])
        json_data_path = "{}/outputs/Inference/pancro_inf/pancro_inf_{}.json".format(os.environ['DIR'], t)
        path_copy_im = "{}/outputs/Inference/pancro_inf_{}/".format(os.environ['DIR'], t)
        table = "pancro_{}".format(t)

    else:
        orig_img_dir = "{}/inputs/RGB/img_without_ann".format(os.environ['DIR'])
        path_list_imgs = "{}/inputs/RGB_listwithoutann.csv".format(os.environ['DIR'])
        json_data_path = "{}/outputs/Inference/rgb_inf/rgb_inf_{}.json".format(os.environ['DIR'], t)
        path_copy_im = "{}/outputs/Inference/rgb_inf_{}/".format(os.environ['DIR'], t)
        table = "rgb_{}".format(t)
        
    #creating a new directory called pythondirectory
    Path(path_copy_im).mkdir(parents=True, exist_ok=True)
    Inference(config, path_list_imgs, table, path_copy_im)    

    #Inference on  annotated images
    if os.getenv('RGB') == 'False':
        orig_img_dir = "{}/inputs/pancro/img_with_ann".format(os.environ['DIR'])
        path_list_imgs = "{}/inputs/pancro_listwithann.csv".format(os.environ['DIR'])
        json_data_path = "{}/outputs/Inference/pancro_inf/pancro_inf_{}.json".format(os.environ['DIR'], t)
        path_copy_im = "{}/outputs/Inference/pancro_inf_{}/".format(os.environ['DIR'], t)
        table = "pancro_{}".format(t)

    else:
        orig_img_dir = "{}/inputs/RGB/img_without_ann".format(os.environ['DIR'])
        path_list_imgs = "{}/inputs/RGB_listwithann.csv".format(os.environ['DIR'])
        json_data_path = "{}/outputs/Inference/rgb_inf/rgb_inf_{}.json".format(os.environ['DIR'], t)
        path_copy_im = "{}/outputs/Inference/rgb_inf_{}/".format(os.environ['DIR'], t)
        table = "rgb_{}".format(t)
        
    Path(path_copy_im).mkdir(parents=True, exist_ok=True)
    Inference(config, path_list_imgs, table, path_copy_im)

## Run the code to inference in all the AOI!

In [None]:
#select the bests models from the validation
results = pd.read_csv("{}/final_results.csv".format(results_path))
results = results.loc[:, ~results.columns.str.contains('^Unnamed')]

#select the best two models (f1score and recall)
f1_result = results.dropna().sort_values(by=['F_score'], ascending=False).iloc[0]
recall_result = results.dropna().sort_values(by=['Recall'], ascending=False).iloc[0]

#Inference for f-1score
t = 'f1score'
Inference_all(t, f1_result)

t = 'recall'
Inference_all(t, recall_result)


#### Now we will do some of the geometrical operations to handle the overlappings between the tiles and datasets.
#### Everything will be upload to a postgres database.

In [None]:
#Postgres info connection:
user = ""
password = ""
host = ""
port = ""
database = ""

In [None]:
# give the name of the two tables with the results in postgres:
schema = "swalim"
f1_socre = "pancro_f1score"
recall = "pancro_recall"

In [None]:
#connect to the database:
con = psycopg2.connect(user = user,
                      password = password,
                      host = host,
                      port = port,
                      database = database)
cursor = con.cursor()
con.set_session(autocommit=True)

#### Create the final product

In [None]:
sql = 'CREATE TABLE "{}".merged AS( SELECT * FROM "{}".{} p UNION SELECT * FROM  "{}".{} pb)'.format(\
            schema, schema, f1_score, schema, recall)

cur.execute(sql)


sql = 'DELETE FROM "{}".merged c WHERE c.ogc_fid IN ( SELECT b.ogc_fid FROM "{}".merged a, "{}".merged b'
        'WHERE a.ogc_fid < b.ogc_fid AND ST_Intersects(a.wkb_geometry, b.wkb_geometry)'' 
        'AND ST_Area(ST_Intersection(a.wkb_geometry, b.wkb_geometry)) / ST_Area(a.wkb_geometry) >0.5);'.format(\
            schema, schema)

cur.execute(sql)

if 'pancro' in recall:
    name = 'pancro'
else:
    name = 'rgb'

sql = 'ALTER TABLE "{}".merged RENAME TO "{}".{}_final'.format(schema, schema, name)

cur.execute(sql)

sql = 'ALTER TABLE "{}".{}_final ADD COLUMN area double precision'.format(schema, name)

cur.execute(sql)

sql = 'UPDATE "{}".{}_final SET area=ST_AREA("{}".{}_final.wkb_geometry)'.format(schema, name, schema, name)

cur.execute(sql)

sql = 'ALTER TABLE "{}".{}_final ADD COLUMN diameter double precision'.format(schema, name)

cur.execute(sql)

sql = 'UPDATE "{}".{}_final SET diameter=ST_Perimeter("{}".{}_final.wkb_geometry)/4'.format(schema, name, schema, name)

cur.execute(sql)

con.close ()