## IMPORT LIBRARIES AND PACKAGES

In [2]:
import glob
from tqdm import tqdm
import pandas as pd
import numpy as np
import cv2
from imantics import Mask

import os
import sys
partial_path = os.getcwd().rsplit('/', 1)[0]
sys.path.insert(0, f'{partial_path}/src/')

from utils import setup_damage_detector, crop_damage_v2
import config as cf

from detectron2.engine.defaults import DefaultPredictor
import shutil
from PIL import Image

## REORDER IMAGES IN THE RIGHT FORM (also rotate image according to exif in order to avoid problems with smarthphone photos)

In [2]:
from PIL.ExifTags import TAGS
import matplotlib.pyplot as plt
from PIL import ImageOps


In [3]:
os.makedirs('../data/test_papers_2.0.0', exist_ok=True )
for dam in tqdm(glob.glob('../data/raw_data/*')):
    dam_id=os.path.basename(dam)
    im_id=0
    for im in glob.glob(os.path.join(dam, '*')):
        with Image.open(im) as image:
            try:
                image = ImageOps.exif_transpose(image)
            except:
                pass
            data = list(image.getdata())
            image_without_exif = Image.new(image.mode, image.size)
            image_without_exif.putdata(data)

            image_without_exif.save(f'../data/test_papers_2.0.0/{dam_id}_{im_id}.jpg')
        im_id+=1

100%|██████████| 43/43 [20:53<00:00, 29.14s/it]


## IMPORT DAMAGE DETECTION MODEL

In [13]:
cfg = setup_damage_detector()
predictor = DefaultPredictor(cfg)

The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}[0m


## EXTRACT DAMAGES FROM IMAGES

In [3]:
inpath= '../data/test_papers_2.0.0/'
path = os.path.join(inpath, '*')


In [15]:
damages = []
for im in tqdm(glob.glob(path)):
    image = cv2.imread(im)
    el = im.split('/')
    filename = el[-1]
    outputs = predictor(image)
    pred_classes = outputs["instances"].pred_classes.cpu().numpy()
    pred_bboxes = outputs["instances"].pred_boxes.tensor.cpu().numpy()
    pred_scores = outputs["instances"].scores.cpu().numpy()
    damages.append([im, filename, {cf.CLASSES_KEY_NAME: pred_classes, cf.BOXES_KEY_NAME: pred_bboxes, cf.SCORES_KEY_NAME: pred_scores}])

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  ../aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)
100%|██████████| 567/567 [04:00<00:00,  2.36it/s]


## SAVE RESULTS INTO PICKLE FILE

In [18]:
df = pd.DataFrame(damages, columns=[cf.FILEPATH_COL_NAME, cf.FILENAME_COL_NAME, cf.DAMAGES_INFO_COL_NAME])
dataset_name = path.split('/')[-1]
outdir = os.path.join(cf.SAVE_RESULTS_PATH, f'{dataset_name}.p')
df.to_pickle(outdir)

## LOAD PICKLE 

In [19]:
df = pd.read_pickle(outdir)

In [5]:
import shutil

## EXTRACT PATCHES CONTAINING DAMAGES AND SAVE THEM AS FIGURES

#### version 1: if no damage is found, take the image as a whole

In [21]:
new_path = inpath.replace('2.0.0','2.0.1')

os.makedirs(new_path, exist_ok=True)
for row in tqdm(df.itertuples()):
    #new_path = os.path.join(cf.CROP_DAMAGE_DATA_PATH, row.claim)
    if len(row.damages['classes'])>0:
        crop_damage_v2(row.path, row.damages, title=row.filename, save=True, outpath=new_path)
    else:
        shutil.copyfile(row.path, os.path.join(new_path, row.filename))

567it [02:02,  4.61it/s]


#### version 2: if no damage is found, skip the picture

In [22]:
new_path = inpath.replace('2.0.0','2.0.2')
os.makedirs(new_path, exist_ok=True)
for row in tqdm(df.itertuples()):
    #new_path = os.path.join(cf.CROP_DAMAGE_DATA_PATH, row.claim)
    if len(row.damages['classes'])>0:
        crop_damage_v2(row.path, row.damages, title=row.filename, save=True, outpath=new_path)
    else:
        pass

567it [02:04,  4.56it/s]


## CLEAN DATASET BY HAND

Go into the folder and remove all the crops of false positives and wrong damage detection (to assure that each index is consistent)

# ASSURE CONSISTENT NUMBERING

In [5]:

new_path = inpath.replace('2.0.0','2.0.1_clean')
final_path =new_path.replace('2.0.1_clean', '2.1.0')
os.makedirs(final_path, exist_ok=True)
damages=set(i.split('/')[-1].split('_')[0] for i in glob.glob(os.path.join(new_path, '*')))
dam_id=0
for i in damages:
    im_id=0
    pictures=glob.glob(os.path.join(new_path, f'{i}_*'))
    for j in pictures:
        shutil.copy(j, os.path.join(final_path, f'{dam_id}_{im_id}.jpg'))
        im_id+=1
    dam_id+=1

In [7]:
new_path = inpath.replace('2.0.0','2.0.2_clean')
final_path =new_path.replace('2.0.2_clean', '2.2.0')
os.makedirs(final_path, exist_ok=True)
damages=set(i.split('/')[-1].split('_')[0] for i in glob.glob(os.path.join(new_path, '*')))
dam_id=0
for i in damages:
    im_id=0
    pictures=glob.glob(os.path.join(new_path, f'{i}_*'))
    for j in pictures:
        shutil.copy(j, os.path.join(final_path, f'{dam_id}_{im_id}.jpg'))
        im_id+=1
    dam_id+=1

In [3]:
new_path = '../data/test_BDEO'
final_path =new_path.replace('BDEO', 'BDEO_remapped')
os.makedirs(final_path, exist_ok=True)
damages=set(i.split('/')[-1].split('_')[0] for i in glob.glob(os.path.join(new_path, '*')))
dam_id=0
for i in damages:
    im_id=0
    pictures=glob.glob(os.path.join(new_path, f'{i}_*'))
    for j in pictures:
        shutil.copy(j, os.path.join(final_path, f'{dam_id}_{im_id}.jpg'))
        im_id+=1
    dam_id+=1