In [None]:
### Unzip the raw gdxray dataset
import zipfile
with zipfile.ZipFile("Castings.zip", 'r') as zip_ref:
    zip_ref.extractall("/content/drive/MyDrive/casting_project")

In [None]:
! pip install --upgrade albumentations

/content/drive/MyDrive/casting_project


In [93]:
import csv
import json
import os
import random
import shutil

import numpy as np
import matplotlib.pylab as plt

import albumentations as alb
import cv2
from tqdm import tqdm

import numpy as np
from scipy import ndimage
from scipy.ndimage.morphology import binary_dilation as imdilate
from PIL import Image,  ImageDraw
from pycocotools import mask
from skimage import measure

In [94]:
# defining util functions

def rotation_matrix_3d(wx, wy, wz):
    R = np.array([
        [np.cos(wy) * np.cos(wz), -np.cos(wy) * np.sin(wz), np.sin(wy)],
        [np.sin(wx) * np.sin(wy) * np.cos(wz) + np.cos(wx) * np.sin(wz),
         -np.sin(wx) * np.sin(wy) * np.sin(wz) + np.cos(wx) * np.cos(wz),
         -np.sin(wx) * np.cos(wy)],
        [-np.cos(wx) * np.sin(wy) * np.cos(wz) + np.sin(wx) * np.sin(wz),
         np.cos(wx) * np.sin(wy) * np.sin(wz) + np.sin(wx) * np.cos(wz),
         np.cos(wx) * np.cos(wy)]
    ])
    return R
def funcf(m, K):
    x = np.ones((3, 1))
    x[0] = m[0]
    x[1] = m[1]
    y = np.matmul(K, x)
    return y



In [95]:
# Define augmentation methods
augment_call = alb.Compose([alb.Resize(height=1144, width=1536, always_apply=True, ),
                            alb.RandomCrop(640, 640, always_apply=True), ],
                           bbox_params=alb.BboxParams(format="coco", min_area=100, min_visibility=0.6), )

In [96]:
# defining few initial variables
folder_name = "C0001"
base_path = ""
test_path_ori = "./test_aug/"
test_path_aug = "./test_aug/augmented.jpg"
test_path_aug_with_bbox = "./test_aug/augmented_final.jpg"
train_folder = "{}train/".format(base_path)
test_folder = "{}test/".format(base_path)
val_folder = "{}val/".format(base_path)
train_json_file = "{}train/train.json".format(base_path, )
val_json_file = "{}val/val.json".format(base_path, )
test_json_file = "{}test/test.json".format(base_path, )
base_folder = "{}Castings/{}/".format(base_path, folder_name)
bbox_csv_file = "{}Castings/BoundingBox_{}.csv".format(base_path, folder_name)
raw_json = "{}Castings/{}/raw.json".format(base_path, folder_name)
test_json = {"images": [], "annotations": [],
             "final_count_images": 0, "final_count_annotations": 0}
val_json = {"images": [], "annotations": [],
            "final_count_images": 0, "final_count_annotations": 0}
train_json = {"images": [], "annotations": [],
              "final_count_images": 0, "final_count_annotations": 0}
vgg_list = []
categories= [
    {
      "id": 0,
      "name": "flaws",
      "supercategory": "none"
    },
    {
      "id": 1,
      "name": "defects",
      "supercategory": "none"
    }
  ]

error_json = {"errors": []}
test_image_id = 1
train_image_id = 1
error_json_file = "{}train/error.json".format(base_path)
raw_json_object = None
test_bbox_id = 1
train_bbox_id = 1
val_bbox_id = 1

In [97]:
###
# Steps for data generation:
# Using albumentation enlarge the images from ‪768 x 572‬ to 1536 x 1144
# Then take random crops of 640 x 640 
# Crops without bounding boxes will be used for defect simulation
# Rest of the crops will serve as our test data 
###

In [98]:
def convert_csv_to_raw_json():
  # Converts the csv bounding box file to json file
    raw_map = {"images": []}
    bbox_csv = csv.reader(open(bbox_csv_file, "r"))
    for row in tqdm(bbox_csv):
        x, y, w, h = 0, 0, 0, 0
        if int(row[1]) < int(row[2]):
            x = int(row[1])
            y = int(row[3])
        else:
            x = int(row[2])
            y = int(row[4])
        w = abs(int(row[1]) - int(row[2]))
        h = abs(int(row[3]) - int(row[4]))
        raw_map["images"].append({
            "image_id": row[0],
            "bbox": [x, y, w, h, "defect"],
        })
    json_object = json.dumps(raw_map, indent=4)
    with open(raw_json, "w") as outfile:
        outfile.write(json_object)

In [99]:
def preprocess_image(image_name, image_id, ):
    image_cv = cv2.imread(base_folder + image_name)
    global test_image_id, test_bbox_id, train_image_id
    result = search(image_id)
    total_bboxes = []
    if len(result) != 0:
        for item in result:
            total_bboxes.append(item["bbox"])
        suffix_test = 1
        suffix_train = 1
        for _ in range(1, 3): # change 51 to any number you want, this will increase the number of random crops applied on the image
            try:
                augmented_result = augment_call(
                    image=image_cv, bboxes=total_bboxes, )
                augmented_image = augmented_result['image']
                augmented_bboxes = augmented_result['bboxes']
                if len(augmented_bboxes) != 0 and augmented_bboxes != []:
                    test_json["images"].append({
                        "height": 640,
                        "width": 640,
                        "file_name": "{}_{}.png".format(image_name.split(".")[0], suffix_test),
                        "id": str(test_image_id),
                        "final_count_images": test_image_id,
                    })
                    test_json["final_count_images"] = test_image_id
                    for bbox in augmented_bboxes:
                        test_json["annotations"].append({
                            "id": str(test_bbox_id),
                            "image_id": str(test_image_id),
                            "category_id": 1,
                            "bbox": [bbox[0], bbox[1], bbox[2], bbox[3]],
                            "area": bbox[2] * bbox[3],
                            "segmentation": [],
                            "iscrowd": 0,

                        })
                        test_json["final_count_annotations"] = test_bbox_id
                        test_bbox_id += 1
                    cv2.imwrite("{}{}{}.png".format(test_folder, image_name.split(".")[0], suffix_test, ),
                                augmented_image)
                    suffix_test += 1
                    test_image_id += 1
                else:
                    train_json["images"].append({
                        "height": 640,
                        "width": 640,
                        "file_name": "{}{}.png".format(image_name.split(".")[0], suffix_train),
                        "id": str(train_image_id),
                    })
                    train_json["final_count_images"] = train_image_id
                    cv2.imwrite("{}{}{}.png".format(train_folder, image_name.split(".")[0], suffix_train),
                                augmented_image)
                    train_image_id += 1
                    suffix_train += 1
            except Exception as error:
                error_json["errors"].append({
                    "file_name": image_name,
                    "final_count_images_test": test_image_id,
                    "final_count_images_train": train_image_id,
                    "error": str(error),
                })
                continue

    else:
        pass



In [100]:

def search(image_id, ):
    result = []
    for p in raw_json_object["images"]:
        if p['image_id'] == image_id:
            result.append(p)
    return result


In [113]:
def augment_and_save():
    global raw_json_object
    raw_json_object = json.load(open(raw_json, "r"))
    ids = os.listdir(base_folder)
    image_id = 1
    for i, _id in enumerate(ids):
        if '.png' in _id:
            preprocess_image(_id, str(image_id))
            image_id += 1
        else:
            pass
    json_object = json.dumps(test_json, indent=4)
    with open(test_json_file, "w") as outfile:
        outfile.write(json_object)
    json_object = json.dumps(train_json, indent=4)
    with open(train_json_file, "w") as outfile:
        outfile.write(json_object)
    json_object = json.dumps(error_json, indent=4)
    with open(error_json_file, "w") as outfile:
        outfile.write(json_object)
    with open("casting_train.txt","w") as textFile:
        for img in train_json["images"]:
            textFile.write("Castings/C0001/" + img["file_name"]+"\n")

In [102]:
def set_folder_name(name, train_directory= None, test_directory= None, base_path_of_directory= None, bbox_csv_file_path = None):
    global folder_name, base_folder, bbox_csv_file, raw_json, test_json, train_json, test_image_id, train_image_id, \
        train_folder, test_folder, base_path
    folder_name = name
    if base_path_of_directory is not None:
        base_path = base_path_of_directory
    bbox_csv_file = "{}Castings/BoundingBox_{}.csv".format(
        base_path, folder_name,)
    base_folder = "{}Castings/{}/".format(base_path, folder_name)
    raw_json = "{}Castings/{}/raw.json".format(base_path, folder_name)
    test_json = json.load(open(test_json_file, "r"))
    train_json = json.load(open(train_json_file, "r"))
    test_image_id = test_json["final_count_images"] + 1
    train_image_id = train_json["final_count_images"] + 1
    if bbox_csv_file_path is not None:
        bbox_csv_file = bbox_csv_file_path
    if train_directory is not None:
        train_folder = train_directory
    if test_directory is not None:
        test_folder = test_directory
    # print(base_path)
    # print(base_folder)


In [103]:
def generate_train_test():
    convert_csv_to_raw_json()
    augment_and_save()

    # list_of_folder_name = ["C0007" ]
    list_of_folder_name = ["C0007", "C0008", "C0010", "C0015", "C0019", "C0021", "C0024", "C0026", "C0029", "C0030",
                           "C0031", "C0032", "C0033", "C0034", "C0035", "C0036", "C0037", "C0038", "C0039", "C0040",
                           "C0041", "C0042", "C0043", "C0045", "C0047", "C0051", "C0054", "C0057", "C0060", "C0062",
                           "C0065", ]
    # list_of_folder_name=[]
    print("************************** \nDone with C0001\n**************************")
    for folder in list_of_folder_name:
        set_folder_name(folder)
        convert_csv_to_raw_json()
        augment_and_save()
        print("************************** \nDone with {}\n**************************".format(folder))


In [104]:
# Now we split the train folder into train and val folders

In [105]:
def train_val_split():
    list_dir = os.listdir(train_folder)
    random.shuffle(list_dir)
    val_file_list = random.sample(list_dir, int(len(list_dir) * 0.1))
    for file in val_file_list:
        shutil.move(train_folder + file, val_folder)



In [106]:

def ellipsoid_simulation(I, K, SSe, f, abc, var_mu, xmax, negative):
    if negative:
        I = 255 - I
    J = I.copy()
    
    (N, M) = I.shape
    binary_mask = np.zeros((N, M),dtype=np.uint8)
    R = np.zeros((N, M))  # ROI of simulated defect

    invK = np.linalg.inv(K)

    if len(abc) == 3:  # ellipsoid
        (a, b, c) = abc
    else:  # sphere
        a = abc
        b = a
        c = a

        # Computation of the 3 x 3 matrices Phi and L
    H = np.linalg.inv(SSe)
    h0 = H[0, :] / a
    h1 = H[1, :] / b
    h2 = H[2, :] / c
    Hs = np.zeros((3, 3))
    Hs[:, 0] = h0[0:3]
    Hs[:, 1] = h1[0:3]
    Hs[:, 2] = h2[0:3]
    hd = np.zeros((3, 1))
    hd[0] = h0[3]
    hd[1] = h1[3]
    hd[2] = h2[3]
    Phi = np.matmul(Hs, Hs.T)
    hhd = np.matmul(hd, hd.T)
    hhd1 = 1 - np.matmul(hd.T, hd)
    L = np.matmul(np.matmul(Hs, hhd), Hs.T) + hhd1 * Phi

    # Location of the superimposed area
    A = L[0:2, 0:2]
    mc = np.array(-f * np.matmul(np.linalg.inv(A), L[0:2, 2]))
    x = np.linalg.eig(A)[0]
    C = np.array([x[1], x[0]])
    la = C
    a00 = np.linalg.det(L) / np.linalg.det(A)
    ae = f * np.sqrt(-a00 / la[0])
    be = f * np.sqrt(-a00 / la[1])
    al = np.arctan2(C[1], C[0]) + np.pi
    ra = np.array([ae * np.cos(al), ae * np.sin(al)])
    rb = np.array([be * np.cos(al + np.pi / 2), be * np.sin(al + np.pi / 2)])
    u1 = funcf(mc + ra, K)
    u2 = funcf(mc + rb, K)
    u3 = funcf(mc - ra, K)
    u4 = funcf(mc - rb, K)
    uc = funcf(mc, K)
    e1 = u1 + u2 - uc
    e2 = u1 + u4 - uc
    e3 = u3 + u2 - uc
    e4 = u3 + u4 - uc
    Es = np.concatenate((e1, e2, e3, e4), axis=1)
    E = Es[0:2, :]
    Emax = np.max(E, axis=1)
    Emin = np.min(E, axis=1)
    umin = int(np.fix(Emin[0]))
    umax = int(np.fix(Emax[0] + 1))
    vmin = int(np.fix(Emin[1]))
    vmax = int(np.fix(Emax[1] + 1))
    bb = (vmin, vmax, umin, umax)
    
    
    if umin >= 0 and umax < M and vmin >= 0 and vmax < N:
        q = 255 / (1 - np.exp(var_mu * xmax))
        R[umin:umax, vmin:vmax] = 1
        R = imdilate(R)

        z = np.zeros((2, 1))
        for u in range(umin, umax):
            z[0] = u
            for v in range(vmin, vmax):
                max_u, max_v = J.shape
                if max_u - 1 < u or max_v - 1 < v:
                    break
                z[1] = v
                m = funcf(z, invK)
                m[0:2] = m[0:2] / f
                p = np.matmul(np.matmul(m.T, L), m)
                if p > 0:
                    x = np.matmul(np.matmul(m.T, Phi), m)
                    d = 2 * np.sqrt(p) * np.linalg.norm(m) / x
                    J[u, v] = np.exp(var_mu * d) * (I[u, v] - q) + q
                    binary_mask[u,v] = 255 # binary binary_mask

    if negative:
        J = 255 - J
    return J, bb, binary_mask

In [107]:

def simulate(image_path,  image_id, isTrain,defect_num):
    global train_bbox_id, val_bbox_id
    darker = False  # for castings, defects are brighter than the test object
    countour = []
    if 'png'not in image_path:
      return 
    # Example 2: Welds
    # img_name = 'weld.png'
    # darker = True # for welds, defects are darker than the test object
    numpy_simulated_image = None
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    I = np.double(image)
    N, M = I.shape
    ok = False
    xmin, xmax, ymin, ymax = None, None, None, None
    while not ok:
        # Transformation (X,Y,Z)->(Xb,Yb,Zb)
        wx = 2 * np.pi * random.random()
        wy = 2 * np.pi * random.random()
        wz = 2 * np.pi * random.random()
        R1 = rotation_matrix_3d(wx, wy, wz)
        tx = -166 + 460 * random.random()
        ty = -250 + 560 * random.random()
        tz = 1000
        t1 = np.array([tx, ty, tz])
        S = np.vstack(
            [np.hstack([R1, t1[:, np.newaxis]]), np.array([0, 0, 0, 1])])

        # Transformation (Xp,Yp,Zp)->(X,Y,Z)
        R2 = rotation_matrix_3d(0, 0, np.pi / 3)
        t2 = np.array([0, 0, 0])
        Se = np.vstack(
            [np.hstack([R2, t2[:, np.newaxis]]), np.array([0, 0, 0, 1])])

        # Transformation (Xp,Yp,Zp)->(Xb,Yb,Zb)
        SSe = np.matmul(S, Se)

        # Transformation (x,y)->(u,v)
        K = np.array([[1.1, 0, 235], [0, 1.1, 305], [0, 0, 1]])

        # Dimensions of the ellipsoid in mm
        a = 1 + 10 * random.random()
        b = 1 + 10 * random.random()
        c = 1 + 10 * random.random()

        # Focal distance in mm
        f = 1500

        # X-ray Absorption coefficient
        mu = 0.01  # the larger the starker the intensity of the defect

        # Maximal observable thickness in mm in the X-ray image
        xmax = 400

        # Simulation: J simulated image, bb bounding box
        numpy_simulated_image, bb, binary_mask = ellipsoid_simulation(
            I, K, SSe, f, (a, b, c), mu, xmax, darker)
        (xmin, xmax, ymin, ymax) = bb

        if xmin >= 0 and xmax < M and ymin >= 0 and ymax < N:
            ok = True

    x = int(xmin)
    y = int(ymin)
    w = int(abs(xmax-xmin))
    h = int(abs(ymax-ymin))

    edge = cv2.Canny(image=binary_mask, threshold1=100, threshold2=200)  

    for i in range(640):  
      for j in range(640):
        if edge[i,j] != 0:
          countour.append(i)
          countour.append(j) 
    # contours = measure.find_contours(edge, 0.5)
    
    # segm = []

    # for contour in contours:
    #   contour = np.flip(contour, axis=1)
    #   segmentation = contour.ravel().tolist()
    #   segm.append(segmentation)

    file_name = image_path.split('/')[len(image_path.split('/'))-1].split('.')[0]
    folder_name = file_name.split('_')[0]
    cv2.imwrite(filename=image_path, img=numpy_simulated_image)     # write image
    cv2.imwrite(filename="masks/" + file_name+ "_" + str(defect_num) + ".png" , img=binary_mask)     # write image

    fortran_ground_truth_binary_mask = np.asfortranarray(edge)
    encoded_ground_truth = mask.encode(fortran_ground_truth_binary_mask)
    ground_truth_area = mask.area(encoded_ground_truth)


    if isTrain:
      train_json["annotations"].append({
        "id": str(train_bbox_id),
        "image_id": str(image_id),
        "category_id": 1,
        "bbox": [x, y, w, h],
        "area": int(ground_truth_area),
        "segmentation": countour,
        "iscrowd": 0,
      }) 
    else:
      val_json["annotations"].append({
        "id": str(train_bbox_id),
        "image_id": str(image_id),
        "category_id": 1,
        "bbox": [x, y, w, h],
        "area": int(ground_truth_area),
        "segmentation": countour,
        "iscrowd": 0,
      })
    train_bbox_id += 1

In [108]:
def loop_for_simulation(isTrain):
    global train_json
    train_json = {"images": [], "annotations": [],
              "final_count_images": 0, "final_count_annotations": 0, "categories":categories}
    global train_image_id
    train_image_id = 1
    if isTrain:
      folder =  train_folder 
    else:
      folder = val_folder
    images = os.listdir(folder)
    for i in range(len(images)):
        if "json" in images[i]:
            images.pop(i)
            break
    for i in range(len(images)):
        if "json" in images[i]:
            images.pop(i)
            break

    for image in tqdm(images):
        random_defects = int(random.randint(1, 1001) / 250)
        for defect_num in range(random_defects):

            image_path = folder+image
            simulate(image_path=image_path, image_id=train_image_id, isTrain=isTrain, defect_num=defect_num )
        
        train_json["images"].append({
            "height": 640,
            "width": 640,
            "file_name": image,
            "id": str(train_image_id),
        })
        train_image_id += 1
    print(train_json)
    json_object = json.dumps(train_json, indent=2)
    with open(train_json_file, "w") as outfile:
      outfile.write(json_object)



In [109]:
generate_train_test()

226it [00:00, 99633.46it/s]


************************** 
Done with C0001
**************************


In [111]:
loop_for_simulation(isTrain=True)

100%|██████████| 64/64 [01:26<00:00,  1.35s/it]

{'images': [{'height': 640, 'width': 640, 'file_name': 'C0001_00011.png', 'id': '1'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00012.png', 'id': '2'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00021.png', 'id': '3'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00022.png', 'id': '4'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00041.png', 'id': '5'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00042.png', 'id': '6'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00051.png', 'id': '7'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00061.png', 'id': '8'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00081.png', 'id': '9'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00082.png', 'id': '10'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00091.png', 'id': '11'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00092.png', 'id': '12'}, {'height': 640, 'width': 640, 'file_name': 'C0001_00101.png', 'id': '13'}, {'height': 640, 'width




In [112]:
%ls GDXray/Castings/C0001/


ls: cannot access 'GDXray/Castings/C0001/': No such file or directory
