## 1. Prepare Data and Create Datasets

In [1]:
!gdown --id 19p3qtGNlc_p12bi3wnbXtELUAR7CE6p8
!unzip data.zip

'gdown' is not recognized as an internal or external command,
operable program or batch file.
'unzip' is not recognized as an internal or external command,
operable program or batch file.


In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import torch

In [None]:
exceptions = [f"M3-BH13250-{i+25}.jpg" for i in range(9)]
labels = pd.read_excel("/content/label.xlsx")
labels = labels.set_index("image_name")

In [None]:
def get_intersect(r1, r2):
        left = max(r1[0], r2[0])
        right = min(r1[1], r2[1])
        if left>right:
            return None
        return (left,right)

In [None]:
allImages = os.listdir("/content/train")
iA = np.array(allImages)
indices = torch.randperm(len(allImages)).tolist()
trainImgs = iA[indices[:-10]]
valImgs = iA[indices[-10:]]

In [None]:
def processImage(image, tags, imageIdStarter=0, annotStarter=0, chunksNum=5, draw=False):
    w, h = image.size
    box = image.crop((0, h/2, w, h))
    tags = tags.copy()
    tags.loc[:, "ymin"] = tags["ymin"].apply(lambda x: x - h/2)
    chunks = []
    annots = []
    bw, bh = box.size
    c = annotStarter
    for i in range(chunksNum):
        coverage = (i * bh / chunksNum, (i+1) * bh / chunksNum)
        localAnnots = []
        for j in range(len(tags)):
            obj = tags.iloc[j]
            s = get_intersect((obj.ymin, obj.ymin + obj.height), coverage)
            if s is None or ((s[1] - s[0])/(coverage[1] - coverage[0])) < 0.3:
                continue
            ymax = obj.ymin + obj.height
            h = obj.height
            if ymax > coverage[1]:
                h -= (ymax - coverage[1])
            localAnnots.append({
                "id": c,
                "area": obj.width * h,
                "image_id": imageIdStarter + i,
                "iscrowd": 0,
                "bbox": [obj.xmin, obj.ymin - coverage[0], obj.width, h],
                "category_id": 1 if obj.label_name == "wood" else 2
            })
            c += 1
        annots += localAnnots
        r = box.crop((0, coverage[0], bw, coverage[1]))
        if draw:
            drawer = ImageDraw.Draw(r)
            for a in localAnnots:
                bb = a["bbox"]
                drawer.rectangle((bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]), outline="white", width=10)
        chunks.append(r)
    imChunks = [{
        "file_name": str(imageIdStarter + idx) + ".jpg",
        "height": im.size[1],
        "width": im.size[0],
        "id": imageIdStarter + idx,
    } for idx, im in enumerate(chunks)]
    return imChunks, chunks, annots

In [None]:
from tqdm import tqdm

In [None]:
import json

def np_encoder(object):
    if isinstance(object, np.generic):
        return object.item()

In [None]:
aS = 1
iS = 1

In [None]:
def procDataset(files, dest, annotDest):
    dataset = {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "wood"}, {"id": 2, "name": "+10cm rock"}]
    }
    global aS
    global iS
    for i in tqdm(range(len(files))):
        iN = files[i]
        if iN in exceptions:
            N = 6
        else:
            N = 5
        img = Image.open("/content/train/" + iN)
        tags = labels.loc[iN].copy()
        iChunks, chunks, annots = processImage(img, tags, imageIdStarter=iS, annotStarter=aS, draw=False, chunksNum=N)
        dataset["images"] += iChunks
        dataset["annotations"] += annots
        iS += len(iChunks)
        aS += len(annots)
        for meta, chunk in zip(iChunks, chunks):
            n_ = meta["file_name"]
            chunk.save(f"{dest}/{n_}")
    f = open(annotDest, "w")
    json.dump(dataset, f, default=np_encoder)
    f.close()

In [None]:
# !rm -rf borehole
!mkdir -p /content/borehole/train
!mkdir -p /content/borehole/val

In [None]:
procDataset(trainImgs, "/content/borehole/train", "/content/borehole/train-annotation.json")
procDataset(valImgs, "/content/borehole/val", "/content/borehole/val-annotation.json")

100%|██████████| 146/146 [01:25<00:00,  1.70it/s]
100%|██████████| 10/10 [00:06<00:00,  1.64it/s]


## 2. Training Swin Transformer

In [None]:
!pip install openmim
!mim install mmdet
!git clone https://github.com/AminRezaei0x443/Swin-Transformer-Object-Detection

In [None]:
%cd Swin-Transformer-Object-Detection/

/content/Swin-Transformer-Object-Detection


In [None]:
!pip uninstall mmdet
!python3 setup.py develop

In [None]:
!pip uninstall pycocotools
!pip install mmpycocotools

In [None]:
!rm -rf /content/Swin-Transformer-Object-Detection/work_dirs/cascade_rcnn_borhole_b

In [None]:
!tools/dist_train.sh configs/swin/cascade_rcnn_borhole_b.py 1 --cfg-options model.backbone.use_checkpoint=True

In [None]:
# Save Checkpoints if needed (with best epoch regarding MAP)
!mkdir /content/drive/MyDrive/swin-unidro-x-2
!cp /content/Swin-Transformer-Object-Detection/work_dirs/cascade_rcnn_borhole_b/20210823_115322.log /content/drive/MyDrive/swin-unidro-x-2/
!cp /content/Swin-Transformer-Object-Detection/work_dirs/cascade_rcnn_borhole_b/20210823_115322.log.json /content/drive/MyDrive/swin-unidro-x-2/
!cp /content/Swin-Transformer-Object-Detection/work_dirs/cascade_rcnn_borhole_b/epoch_8.pth /content/drive/MyDrive/swin-unidro-x-2/
!cp /content/Swin-Transformer-Object-Detection/work_dirs/cascade_rcnn_borhole_b/epoch_9.pth /content/drive/MyDrive/swin-unidro-x-2/

## 3. Prepare Testset

In [None]:
testImages = os.listdir("test-rqd")

In [None]:
def processImageTest(name, image, imageIdStarter=0, chunksNum=5):
    w, h = image.size
    box = image.crop((0, h/2, w, h))
    chunks = []
    bw, bh = box.size
    for i in range(chunksNum):
        coverage = (i * bh / chunksNum, (i+1) * bh / chunksNum)
        r = box.crop((0, coverage[0], bw, coverage[1]))
        chunks.append(r)
    imChunks = [{
        "file_name": f"{name}-{idx}.jpg",
        "height": im.size[1],
        "width": im.size[0],
        "id": imageIdStarter + idx,
    } for idx, im in enumerate(chunks)]
    return imChunks, chunks

In [None]:
iS = 1

In [None]:
def procDataset(files, dest, annotDest):
    dataset = {
        "images": [],
        "annotations": [],
        "categories": [{"id": 1, "name": "wood"}, {"id": 2, "name": "+10cm rock"}]
    }
    global iS
    for i in tqdm(range(len(files))):
        iN = files[i]
        N = 5
        img = Image.open("/content/test-rqd/" + iN)
        iChunks, chunks = processImageTest(iN.replace(".jpg", "").replace(".JPG", ""), img, imageIdStarter=iS, chunksNum=N)
        dataset["images"] += iChunks
        iS += len(iChunks)
        for meta, chunk in zip(iChunks, chunks):
            n_ = meta["file_name"]
            chunk.save(f"{dest}/{n_}")
    f = open(annotDest, "w")
    json.dump(dataset, f, default=np_encoder)
    f.close()

In [None]:
!rm -rf borehole/test
!mkdir -p borehole/test

In [None]:
procDataset(testImages, "/content/borehole/test", "/content/borehole/test-annotation.json")

In [None]:
!python tools/test.py configs/swin/cascade_rcnn_borhole_b.py /content/Swin-Transformer-Object-Detection/work_dirs/cascade_rcnn_borhole_b/epoch_9.pth --out res-X-9-2.pkl

## 4. Calculate RQD

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import torch

In [None]:
def rqd(f):
    if f >= 0 and f <= 25:
        return 1
    elif f > 25 and f <= 50:
        return 2
    elif f > 50 and f <= 75:
        return 3
    elif f > 75 and f <= 90:
        return 4
    return 5

In [None]:
import json

tI = json.load(open("/content/borehole/test-annotation.json", "r"))
idMap = {}
for d in tI["images"]:
    idMap[d["id"]] = d["file_name"]

In [None]:
frqd = pd.read_excel("from-to-rqd.xlsx")
frqd["h"] = frqd["to"] - frqd["from"]
idf = frqd.set_index("RunId")

In [None]:
import pickle

objects = []
with (open("res-X-8-2.pkl", "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break

In [None]:
def area(t):
    a, b, c, d = t
    return (c-a)*(d-b)

def coveringRect(rects):
    return (min(r[0] for r in rects),
            min(r[1] for r in rects),
            max(r[2] for r in rects),
            max(r[3] for r in rects))

def clip(bb, rects):
    if not rects:
        return []
    (x1, y1, x2, y2) = rects[0]
    rs = rects[1:]
    (a1, b1, a2, b2) = bb
    if a1 == a2 or b1 == b2:
        return []
    if a1 >= x2 or a2 <= x1 or y1 >= b2 or y2 <= b1:
        return clip(bb, rs)
    return [(max(a1, x1), max(b1, y1), min(a2, x2), min(b2, y2))] + clip(bb, rs)

def calc(cr, rects):
    if not rects:
        return 0
    rc = rects[0]
    rs = rects[1:]
    x1, y1, x2, y2 = cr
    l1, m1, l2, m2 = rc
    t = (x1, m2, x2, y2)
    b = (x1, y1, x2, m1)
    l = (x1, m1, l1, m2)
    r = (l2, m1, x2, m2)
    return area(rc) + sum(calc(x, clip(x, rs)) for x in [t, b, l, r])

def overlapArea(rects):
    if len(rects) == 0:
        return 0
    return calc(coveringRect(rects), rects)

In [None]:
rdf = []
for i in range(48):
    iN = "-".join(idMap[i * 5 + 1].split("-")[:3]) + ".jpg"
    allBoxes = []
    w = 0
    for j in range(5):
        imN = idMap[i * 5 + j + 1]
        img = Image.open("/content/borehole/test/" + imN).convert("RGB")
        w = img.size[0]
        h = img.size[1]
        mr = objects[0][i*5 + j]
        boxes = []
        labels = []
        for rbox in mr[0]:
            if rbox[-1] > 0.9:
                boxes.append(rbox[:4])
                labels.append(1)
        for rbox in mr[1]:
            if rbox[-1] > 0.1:
                boxes.append(rbox[:4])
                labels.append(2)
        box_centers = []
        for b,l in zip(boxes,labels):
            box_centers.append({
                "x": (b[0] + b[2])/2,
                "y": (b[1] + b[3])/2,
                "box": b.tolist(),
                "cls": l,
                "w": b[2] - b[0],
                "h": b[3] - b[1],
            })
        for b in box_centers:
            b["box"][1] += j * h 
            b["box"][3] += j * h 
            b["range"] = (b["box"][1], b["box"][3])
        allBoxes += sorted(box_centers, key=lambda t: t["box"][0])
    pixelM = 110 / w
    cmPix = 1 / pixelM

    run = 1
    prevB = None
    len10 = 0
    lx = []
    lxW = []
    hM = []
    rec = []
    for d in allBoxes:
        if d["cls"] == 2 and d["w"] > (cmPix * 9.9) and d["h"] > (0 * w):
            len10 += d["w"]
            lx.append(d["box"])
            lxW.append((d["box"][0], d["box"][2]))
            hM.append(d["box"][3] - d["box"][1])
            prevB = d
        if d["cls"] == 1:
            prevB = d
            if len(hM) == 0:
                hM.append(1)
            rec.append({
                "run": run,
                "len": (overlapArea(lx) / np.mean(hM)) * pixelM / 100,
            })
            len10 = 0
            lx = []
            lxW = []
            hM = []
            run += 1
    if prevB is not None and prevB["cls"] == 2 and prevB["w"] > (cmPix * 9.9) and prevB["h"] > (0 * w):
        if len(hM) == 0:
            hM.append(1)
        rec.append({
            "run": run,
            "len": (overlapArea(lx) / np.mean(hM)) * pixelM / 100,
        })
    xN = iN.replace(".jpg", "")
    for run in rec:
        rId = xN + "-" + str(run["run"])
        try:
            hx = idf.loc[rId].h
            rdf.append({
                "RunId": rId,
                "Rqd": run["len"] * 100 / hx,
                "Prediction": rqd(run["len"] * 100 / hx)
            })
        except:
            print("not found index : ", rId)

In [None]:
rdfx = pd.DataFrame(rdf, columns=["RunId", "Prediction", "Rqd"])
for x in set.difference(set(frqd.RunId.unique()), rdfx.RunId.unique().tolist()):
    rdfx = rdfx.append({
        "RunId": x,
        "Prediction": 1
    }, ignore_index=True)

In [None]:
rdfx[["RunId", "Prediction"]].to_csv("result.csv", index=False)