# NEU-CLS

In [None]:
pip install patool

In [None]:
import patoolib
patoolib.extract_archive("/nfs4-p1/gj/datasets/NEU surface defect database.rar", outdir="/nfs4-p1/gj/datasets/")

In [None]:
import torch
from torch import nn
from torch import optim
import glob
import os
import numpy as np
import matplotlib.pyplot as plt 
import cv2
import seaborn

np.random.seed(928)
torch.manual_seed(928)

In [None]:
device = torch.device("cuda:0")

datasets_root = "/nfs4-p1/gj/datasets/"
dataset_name = "NEU-CLS"
graph_data_root = "/nfs4-p1/gj/DEFECT2022/data/"

class_names = ["RS","In","Pa","Sc","PS","Cr"]
image_size = 200 # hight = width
class_num = 300 # 300 images in one class

In [None]:
sample = plt.imread(datasets_root + dataset_name+"/Cr_1.bmp")
print(type(sample))
print(sample.shape)
print(sample.size)
print(sample.max())
print(sample.min())
plt.imshow(sample, cmap='gray')
plt.show()

In [None]:
unit_size = 10
color_unit_size = 16

import pandas as pd

sample = sample // color_unit_size
print(sample.max())

In [None]:
nodes = []
patch_x_max = int(image_size/unit_size)
patch_y_max = patch_x_max

for i in range(patch_x_max):
    for j in range(patch_y_max):
        patch = sample[i * unit_size: (i + 1) * unit_size, j * unit_size: (j + 1) * unit_size]
        uni_c, counts = np.unique(patch, return_counts=True)
        
        for c, count in zip(uni_c, counts):
            cur_node = dict()
            cur_node['i'] = np.array(i)
            cur_node['j'] = np.array(j)
            cur_node['c'] = np.array(c)
            cur_node['density'] = count / patch.size

            nodes.append(cur_node)
            
# print(len(nodes))
nodesframe = pd.DataFrame(nodes)
# pd.set_option('display.max_rows', 50)
# print(nodesframe)

In [None]:
nodesframe.to_csv(graph_data_root+dataset_name+"/nodes.csv", index_label='node_id')

In [None]:
edges = []
print(nodesframe.shape)

for node_index in range(nodesframe.shape[1]):
    node = nodesframe.iloc[node_index]
    # print(node)
    adj_nodes = nodesframe.loc[(nodesframe['i']>=node['i']-1)&(nodesframe['i']<=node['i']+1)&
                           (nodesframe['j']>=node['j']-1)&(nodesframe['j']<=node['j']+1)&
                           (nodesframe['c']>=node['c']-1)&(nodesframe['c']<=node['c']+1)]
    # print(adj_nodes)
    for adj_nodes_index in adj_nodes.index:
        # print(adj_nodes_index)
        if adj_nodes_index != node_index:
            cur_edge = dict()
            cur_edge['from_node'] = node_index
            cur_edge['to_node'] = adj_nodes_index

            edges.append(cur_edge)

# print(len(edges))
edgesframe = pd.DataFrame(edges)
# print(edgesframe)

In [None]:
edgesframe.to_csv(graph_data_root+dataset_name+"/edges.csv", index=False)

# KSDD

In [17]:
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import multiprocessing as mp

datasets_root = "/nfs4-p1/gj/datasets/AnomalyDetection/"
graph_data_root = "/nfs4-p1/gj/DEFECT2022/data/"

dataset_name = "KSDD"
dir_names = os.listdir(os.path.join(datasets_root, dataset_name))
print(len(dir_names))
unit_size = 50
color_unit_size = 16

50


In [18]:
from math import ceil


label_list = glob.glob(
    os.path.join(datasets_root, dataset_name, dir_names[0], f'*.bmp'))
img_list = glob.glob(
    os.path.join(datasets_root, dataset_name, dir_names[0], f'*.jpg'))

label = plt.imread(
    os.path.join(datasets_root, dataset_name, dir_names[0], label_list[1]))
img = plt.imread(
    os.path.join(datasets_root, dataset_name, dir_names[0], img_list[1]))

label = plt.imread('/nfs4-p1/gj/datasets/AnomalyDetection/KSDD/kos02/Part6_label.bmp')
img = plt.imread('/nfs4-p1/gj/datasets/AnomalyDetection/KSDD/kos02/Part6.jpg')
print('img.shape:', img.shape)
print('img.max:', img.max())
print('label.max:', label.max())
height, width = img.shape[0], img.shape[1]
img = img // color_unit_size

patch_x_max = ceil(height / unit_size)
patch_y_max = ceil(width / unit_size)
print(patch_x_max, patch_y_max)

img.shape: (1260, 500)
img.max: 255
label.max: 255
26 10


In [19]:
print(label.max())
print(label.min())
print(label.max()== label.min())

255
0
False


In [20]:
class_index = 0 if label.max() == label.min() else 1
print(class_index)

1


In [21]:
nodes = []
# divide the whole image into several patch
for i in range(patch_x_max):
    for j in range(patch_y_max):
        if (i + 1) * unit_size < height and (j + 1) * unit_size < width:
            patch = img[i * unit_size:(i + 1) * unit_size,
                        j * unit_size:(j + 1) * unit_size]
        elif (i + 1) * unit_size >= height and (j + 1) * unit_size < width:
            patch = img[i * unit_size:, j * unit_size:(j + 1) * unit_size]
        elif (i + 1) * unit_size < height and (j + 1) * unit_size >= width:
            patch = img[i * unit_size:(i + 1) * unit_size, j * unit_size:]
        elif (i + 1) * unit_size >= height and (j +
                                                1) * unit_size >= width:
            patch = img[i * unit_size:, j * unit_size:]

        uni_c, counts = np.unique(patch, return_counts=True)

        # define each patch as a node in the graph
        for c, count in zip(uni_c, counts):
            cur_node = dict()
            cur_node['i'] = np.array(i)
            cur_node['j'] = np.array(j)
            cur_node['c'] = np.array(c)
            cur_node[
                'density'] = count * unit_size * unit_size / patch.size

            nodes.append(cur_node)

nodesframe = pd.DataFrame(nodes)

In [22]:
nodesframe.to_csv('/nfs4-p1/gj/DEFECT2022/data/node.csv',
                  index_label='node_id')


In [23]:
edges = []
# for node_index in range(nodesframe.shape[0]):
#     node = nodesframe.iloc[node_index]
#     adj_nodes = nodesframe.loc[(nodesframe['i'] >= node['i'] - 1)
#                                 & (nodesframe['i'] <= node['i'] + 1) &
#                                 (nodesframe['j'] >= node['j'] - 1) &
#                                 (nodesframe['j'] <= node['j'] + 1) &
#                                 (nodesframe['c'] >= node['c'] - 1) &
#                                 (nodesframe['c'] <= node['c'] + 1)]
#     if len(adj_nodes) > 27:
#         print(adj_nodes.shape)
#         print(node)
#         print(len(node))
#         print(adj_nodes)
#         break
#     for adj_nodes_index in adj_nodes.index:
#         if adj_nodes_index != node_index:
#             cur_edge = dict()
#             cur_edge['from_node'] = node_index
#             cur_edge['to_node'] = adj_nodes_index

#             edges.append(cur_edge)

# edgesframe = pd.DataFrame(edges)

class_names ['Normal', 'Anomalous']
Normal #graph: 347
Anomalous #graph: 52
dataset len: 239 79 81
dataset len: 416 79 81
