In [None]:
from PIL import Image, ImageDraw
import json

# Open the image
image = Image.open('/media/buslab/bed7bcae-c46d-4bde-874d-bdeb04d5dec9/NERO/DIP/final_project/defect-detection-and-segment-deep-learning/class_data/sample/scratch/Converted_ 0069.png')

# Load the bounding box data from the JSON file
with open('/media/buslab/bed7bcae-c46d-4bde-874d-bdeb04d5dec9/NERO/DIP/final_project/defect-detection-and-segment-deep-learning/class_data/sample/scratch/Converted_ 0069.json') as f:
    data = json.load(f)

def expand2square(pil_img, background_color):
    width, height = pil_img.size
    if width == height:
        return pil_img
    elif width > height:
        result = Image.new(pil_img.mode, (width, width), background_color)
        result.paste(pil_img, (0, (width - height) // 2))
        return result
    else:
        result = Image.new(pil_img.mode, (height, height), background_color)
        result.paste(pil_img, ((height - width) // 2, 0))
        return result

if data['shapes'] != 1:
    for i in range(len(data['shapes'])):
        points = data['shapes'][i]['points']
        print(points)
        x1, y1 = points[0]
        x2, y2 = points[1]

        # Create an ImageDraw object
        draw = ImageDraw.Draw(image)
        
        # paint and write the text with the bounding box points
        # text
        draw.text((x1, y1), "x1, y1", fill='red')
        draw.text((x2, y2), "x2, y2", fill='red')
        
        # paint the bounding box
        draw.rectangle([x1, y1, x2, y2], outline='green')


        # Draw the bounding box on the image
        # draw.rectangle([x1, y1, x2, y2], outline='green')
image

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import sys
import os
import json
import numpy as np
from PIL import Image

sys.path.append(os.path.realpath('..'))

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

train_set_dir = '../defect-detection-and-segment-deep-learning/class_data/Train'
test_set_dir = '../defect-detection-and-segment-deep-learning/class_data/Val'


### project target
* You are asked to detect and segment the defects of manufacturing from given images. 
* You are given with  total 450  images and ground truth annotations(mask and bounding box positions).
* You can use the image processing skills as well as deep learning methods for this homework.

### Project steps
* Step 1: Data preprocessing
* Step 2: Data augmentation
* Step 3: Model training: dectection and segmentation models
* Step 4: Model evaluation with IoU and precision
* Step 5: Creat a GUI for defect detection and segmentation

In [None]:
# Define the transformations to apply to the images
transform = torchvision.transforms.Compose([
    # resize the image to 224x224
    torchvision.transforms.Resize((224, 224)),
    
    # convert the rgb image to grayscale
    torchvision.transforms.Grayscale(num_output_channels=1),
    # convert the image to a tensor
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))
])

# Define the custom dataset class
class DefectDetectionDataset(torch.utils.data.Dataset):
    def __init__(self, root_dir, transform=None, mask:bool=False):
        self.root_dir = root_dir
        self.transform = transform
        self.mask = mask
        self.class_names = ['powder_uncover', 'powder_uneven', 'scratch']
        self.types = ['image']
        self.image_filenames = []
        # bounding box
        self.boxes = []
        self.labels = []
        self.mask_filenames = []
        for class_name in self.class_names:
            class_dir = os.path.join(root_dir, class_name)
            # concatenate the image, label and mask directories
            for type_name in self.types:
                type_dir = os.path.join(class_dir, type_name)
                for filename in os.listdir(type_dir):
                    if filename.endswith('.png'):
                        self.image_filenames.append(os.path.join(type_dir, filename))
                        # read the label file for bounding box position
                        with open(os.path.join(type_dir.replace('image', 'label'), filename.replace('.png', '.json'))) as f:
                            label_dict = json.load(f)
                            for i in range(len(label_dict['shapes'])):
                                points = label_dict['shapes'][i]['points']
                                x1, y1 = points[0]
                                x2, y2 = points[1]
                                self.boxes.append([x1, y1, x2, y2])
                        self.mask_filenames.append(os.path.join(type_dir.replace('image', 'mask'), filename.replace('.png', '.png')))
                        self.labels.append(class_name)
    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        image = Image.open(self.image_filenames[idx])
        
        
        box = self.boxes[idx]
        mask = Image.open(self.mask_filenames[idx]).convert('L')
        
        label = self.labels[idx]
        
    
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)

        if self.mask == True:    
            return image, box, mask, label
        else:
            return image, box, label
    
# Load the training and test datasets
trainset = DefectDetectionDataset(root_dir=train_set_dir, transform=transform)
testset = DefectDetectionDataset(root_dir=test_set_dir, transform=transform)

batch_size = 32

# Define the data loaders
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=1)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=1)

model = torchvision.models.resnet50()
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, len(trainset.class_names))

model = model.to(device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)



In [2]:
import json
from tqdm import tqdm
from PIL import Image
import os
import pandas as pd

yolo_train_image_path = "/media/buslab/bed7bcae-c46d-4bde-874d-bdeb04d5dec9/NERO/DIP/final_project/yolov7/defect/image/train/"
yolo_val_image_path = "/media/buslab/bed7bcae-c46d-4bde-874d-bdeb04d5dec9/NERO/DIP/final_project/yolov7/defect/image/val/"
yolo_image_path = "/media/buslab/bed7bcae-c46d-4bde-874d-bdeb04d5dec9/NERO/DIP/final_project/yolov7/defect/"

train_set_dir = '../defect-detection-and-segment-deep-learning/class_data/Train'
test_set_dir = '../defect-detection-and-segment-deep-learning/class_data/Val'

class_names = ['powder_uncover', 'powder_uneven', 'scratch']
types = ['image']
yolo_csv = pd.DataFrame(columns=["category", "x", "y", "w", "h", "image_name", "image_path"])

# Load the COCO file as a dictionary
def convert_to_yolo_format(convert_img_file:str, save_img_file_name:str, save:bool=False):
    


    label_path = convert_img_file.replace("image", "label").replace(".png", ".json")
    image_name = convert_img_file.split("/")[-1]
    
    with open(label_path, "r") as file:
        json_file = json.load(file)

    width, height = Image.open(convert_img_file).size
    
    for annotation in json_file["shapes"]:
        if annotation["label"] == "powder_uncover":
            category_id = 0
        elif annotation["label"] == "powder_uneven":
            category_id = 1
        else:
            category_id = 2
        points = annotation["points"]
        
        # normalize the bounding box
        x_min, y_min = points[0]
        x_max, y_max = points[1]
        x = (x_min + (x_max-x_min)/2) * 1.0 / width
        y = (y_min + (y_max-y_min)/2) * 1.0 / height
        w = (x_max-x_min) * 1.0 / width
        h = (y_max-y_min) * 1.0 / height
        # print(category_id, x, y, w, h)
        
        yolo_format_data = str(category_id) + " " + str(x) + " " + str(y) + " " + str(w) + " " + str(h)
        
        yolo_csv.loc[len(yolo_csv)] = [category_id, x, y, w, h, image_name, convert_img_file]
        
        # save the bounding box and category_id to a text file
        # name is the same as the image name
        
        
        
        if save == True:
        
            with open(yolo_train_image_path.replace("image", "labels") + image_name.replace(".png", ".txt"), "a") as file:
                file.write(yolo_format_data)
                file.write("\n")
                
    if save == True:
    # save the image path to a text file
        with open(yolo_image_path + f"{ save_img_file_name }.txt", "a") as file:
            file.write(f"./images/{ save_img_file_name }/" + image_name)
            file.write("\n")

sample_folder = "/media/buslab/bed7bcae-c46d-4bde-874d-bdeb04d5dec9/NERO/DIP/final_project/defect-detection-and-segment-deep-learning/class_data/sample/"

for dataset in [train_set_dir, test_set_dir]:
    for class_name in class_names:
        class_dir = os.path.join(dataset, class_name)
        # concatenate the image, label and mask directories
        for type_name in types:
            type_dir = os.path.join(class_dir, type_name)
            for filename in tqdm(os.listdir(type_dir)):
                # rename the image name with class + image name

                # Image.open(f"{type_dir}/{filename}").save(f"{type_dir}/{class_name}_{filename}")
                
                if dataset == train_set_dir:
                    convert_to_yolo_format(convert_img_file=f"{type_dir}/{filename}", save_img_file_name='train')
                else:
                    convert_to_yolo_format(convert_img_file=f"{type_dir}/{filename}", save_img_file_name='val')


100%|██████████| 100/100 [00:02<00:00, 34.65it/s]
100%|██████████| 100/100 [00:02<00:00, 39.65it/s]
100%|██████████| 100/100 [00:00<00:00, 203.44it/s]
100%|██████████| 50/50 [00:01<00:00, 34.99it/s]
100%|██████████| 50/50 [00:01<00:00, 40.65it/s]
100%|██████████| 50/50 [00:00<00:00, 382.39it/s]


In [4]:
yolo_csv.query("image_path.str.contains('0395.png')").image_path.unique()

array(['../defect-detection-and-segment-deep-learning/class_data/Val/powder_uncover/image/converted_ 0395.png'],
      dtype=object)

In [7]:
Image.open("../defect-detection-and-segment-deep-learning/class_data/Val/powder_uncover/image/converted_ 0395.png")

In [3]:
yolo_csv.groupby("image_name").count().sort_values(by="image_path", ascending=False)

Unnamed: 0_level_0,category,x,y,w,h,image_path
image_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
converted_ 0129.png,13,13,13,13,13,13
converted_ 0128.png,12,12,12,12,12,12
converted_ 0395.png,9,9,9,9,9,9
converted_ 0137.png,8,8,8,8,8,8
converted_ 0141.png,7,7,7,7,7,7
...,...,...,...,...,...,...
converted_ 0206.png,1,1,1,1,1,1
converted_ 0205.png,1,1,1,1,1,1
converted_ 0204.png,1,1,1,1,1,1
converted_ 0203.png,1,1,1,1,1,1
