In [45]:
import os

import skimage as ski
from pprint import pprint
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt

import polars as pl

In [46]:
class LineCombination:
    def __init__(self, start, finish):
        self.start = start
        self.finish = finish
        self.points = []
        self.score = 0
        self.percent = 0.0

    def __str__(self):
        return f"{self.start}-{self.finish}\t{len(self.points)}/{self.score}\tpercent={round(self.percent, 2)} %)"

    def __repr__(self):
        return f"{self.start}-{self.finish}\t{len(self.points)}/{self.score}\tpercent={round(self.percent, 2)} %)"

In [47]:
def draw_huge_line(comb: LineCombination, shape, start, finish):
    if start[0] == finish[0]:
        for y in range(shape[1]):
            p = (start[0], y)
            comb.points.append(p)
    else:
        slope = (float(finish[1]) - float(start[1])) / (float(finish[0]) - float(start[0]))
        intercept = float(start[1]) - slope * float(start[0])

        for i in range(shape[0]):
            y = slope * i + intercept
            p = (i, round(y))
            comb.points.append(p)

In [48]:
def feature_generator(path: str) -> tuple[int, float, int, float, int, int, int]:
    img = cv.imread(path)

    blurred = cv.GaussianBlur(img, (9,9), 0)
    gray = cv.cvtColor(blurred,cv.COLOR_BGRA2GRAY)
    
    # Shape and contour detection
    ret, img_tresh = cv.threshold(gray,20,255,cv.THRESH_BINARY+cv.THRESH_OTSU)
    a1, a2 = cv.findContours(img_tresh, cv.RETR_EXTERNAL,cv.CHAIN_APPROX_SIMPLE)
    tetetet = cv.approxPolyDP(a1[0], 0.01*cv.arcLength(a1[0],True),True)

    # Corner detection
    corner_grey = np.float32(gray)
    corners = cv.goodFeaturesToTrack(corner_grey, 25, 0.01, 10)
    corners = np.intp(corners)
    

    total_corners = cv.goodFeaturesToTrack(corner_grey, 0, 0.01, 10)

    # Circle detection
    circles = cv.HoughCircles(gray, cv.HOUGH_GRADIENT_ALT, 1.5, 5, param1=300, param2=0.7, minRadius=10, maxRadius=0)

    # Line detection
    lines: list[LineCombination] = []
    edges = cv.Canny(gray, 50, 50)
    thick = cv.dilate(edges, None, iterations=2)

    # get all possible line combinations
    for i in range(len(corners)):
        for j in range(i + 1, len(corners)):
            # get start and finish point
            start = corners[i][0]
            finish = corners[j][0]

            lines.append(LineCombination(start, finish))

    for comb in lines:
        draw_huge_line(comb, gray.shape, comb.start, comb.finish)

    for comb in lines:
        for point in reversed(comb.points):
            if point[0] >= thick.shape[0] or point[1] >= thick.shape[1] \
                    or point[0] < 0 or point[1] < 0:
                comb.points.remove(point)

    # clear duplicate points (if any) (optional)
    for comb in lines:
        comb.points = list(set(comb.points))

    # calculate score for each line combination and success percentage
    for line in lines:
        for point in line.points:
            if thick[point[0]][point[1]] == 255:
                line.score += 1

        # calculate percentage
        line.percent = round(line.score / 400 * 100, 2)


    sorted_by_percentage = sorted(lines, key=lambda x: x.percent, reverse=True)
    # get all lines with percentage >= 25% a.k.a. num_lines
    quarter_successfull_lines = filter(lambda x: x.percent >= 25.0, sorted_by_percentage)
    
    return tetetet.shape[0], cv.contourArea(tetetet), sorted_by_percentage[0].score, sorted_by_percentage[0].percent, len(circles[0]) if circles is not None else 0, len(list(quarter_successfull_lines)), len(total_corners)

In [49]:
paths = os.listdir('dataset')
parts = {}

for p in paths:
    if p.endswith('.png'):
        code = p.split(' ', 1)[0]
        if parts.get(code) is None:
            parts[code] = [p]
        else:
            parts[code].append(p)
results = []
requested = ["3001", "4286", "2780", "6632", "99301", "43093", "54200"]

done = 0
total = sum([len(parts[v]) for v in parts.keys() if v in requested])
total

5600

In [50]:
for code, path in parts.items():
    if code in requested:
        for p in path:
            print(f"Processing {p}, {done}/{total}, {round(done/total*100, 2)}%")
            results.append([code, *feature_generator('dataset/' + p)])
            done += 1

Processing 2780 Peg with friction 000L.png, 0/5600, 0.0%
Processing 2780 Peg with friction 000R.png, 1/5600, 0.02%
Processing 2780 Peg with friction 001L.png, 2/5600, 0.04%
Processing 2780 Peg with friction 001R.png, 3/5600, 0.05%
Processing 2780 Peg with friction 002L.png, 4/5600, 0.07%
Processing 2780 Peg with friction 002R.png, 5/5600, 0.09%
Processing 2780 Peg with friction 003L.png, 6/5600, 0.11%
Processing 2780 Peg with friction 003R.png, 7/5600, 0.12%
Processing 2780 Peg with friction 004L.png, 8/5600, 0.14%
Processing 2780 Peg with friction 004R.png, 9/5600, 0.16%
Processing 2780 Peg with friction 005L.png, 10/5600, 0.18%
Processing 2780 Peg with friction 005R.png, 11/5600, 0.2%
Processing 2780 Peg with friction 006L.png, 12/5600, 0.21%
Processing 2780 Peg with friction 006R.png, 13/5600, 0.23%
Processing 2780 Peg with friction 007L.png, 14/5600, 0.25%
Processing 2780 Peg with friction 007R.png, 15/5600, 0.27%
Processing 2780 Peg with friction 008L.png, 16/5600, 0.29%
Processin

In [51]:
df_schema = {
    'code': pl.Utf8,
    'shape': pl.Int64,
    'area': pl.Float64,
    'score': pl.Int64,
    'percent': pl.Float64,
    'circles': pl.Int64,
    'num_lines': pl.Int64,
    'corners': pl.Int64
}

In [52]:
df = pl.DataFrame(results, df_schema)
df

code,shape,area,score,percent,circles,num_lines,corners
str,i64,f64,i64,f64,i64,i64,i64
"""2780""",18,7415.0,77,19.25,2,0,39
"""2780""",12,8439.5,68,17.0,0,0,34
"""2780""",14,7019.5,66,16.5,3,0,39
"""2780""",14,8599.5,68,17.0,0,0,37
"""2780""",16,7835.0,100,25.0,4,1,39
"""2780""",10,8525.0,111,27.75,0,7,34
"""2780""",12,9360.0,98,24.5,2,0,45
"""2780""",12,8628.5,117,29.25,1,9,38
"""2780""",10,9478.0,65,16.25,0,0,40
"""2780""",16,5419.5,80,20.0,4,0,33


In [53]:
df.write_csv('Synthetic_Data.csv')