## Main Program for classification

# imports

In [2]:
# imports

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.feature import local_binary_pattern
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import json
from shapely.geometry import Polygon, box
from shapely.affinity import scale, affine_transform
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import time

# functions for feature exctraction

In [3]:
#function to plot image with given polygons
def plot_picture_with_polygons(image, polygons):
    fig, ax = plt.subplots(1, figsize=(20,12))
    ax.imshow(image)
    for polygon in polygons:
        points = polygon
        polygon = patches.Polygon(points, closed=True, edgecolor='red', fill=False, linewidth=2, label=label)
        ax.add_patch(polygon)
    plt.show()

In [4]:
#function to find rough areas in picture
def rough_image_filter(image, threshold=8):

    #read in image as greyscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #apply local binary pattern
    lbp = local_binary_pattern(image, P=8, R=2, method='uniform')

    lbp_uint8 = np.uint8((lbp / lbp.max())*255)
    
    blurred = cv2.blur(lbp_uint8, (5, 5), 0)
    mean = np.mean(blurred)
    std = np.std(blurred)
    median = np.median(blurred)
    #apply thresholding
    _, binary_image = cv2.threshold(blurred, mean - std, 255, cv2.THRESH_BINARY) 

    binary_blurred = cv2.blur(binary_image, (50, 50), 0)
    mean = np.mean(binary_blurred)
    std = np.std(binary_blurred)
    #apply thresholding
    _, binary_image = cv2.threshold(binary_blurred, mean - 1.5 * std, 255, cv2.THRESH_BINARY) # normlaize and convert to uint8, then blur

    return binary_image

In [5]:
#function to identify reddisch areas in an image
def reddish_image_filter(image):

    def color_detector(image, lower_bounds: list, upper_bounds: list) -> list:
        hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        mask = np.zeros(hsv_image.shape[:2], dtype=np.uint8)
        for lower_bound, upper_bound in zip(lower_bounds, upper_bounds):
            lower_bound = np.array(lower_bound)
            upper_bound = np.array(upper_bound)
            mask += cv2.inRange(hsv_image, lower_bound, upper_bound)
        return mask

    rusty_lower_bounds = [[0, 40, 50]]
    rusty_upper_bounds = [[20, 255, 200]]
    reddish_areas = color_detector(image, rusty_lower_bounds, rusty_upper_bounds)

    return reddish_areas

In [6]:
#find darker areas in an image
def darker_image_filter(image, z = 200):
    # Convert image to grayscale:
    grey_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    grey_image = cv2.blur(grey_image, (15, 15), 0)
    mean = np.mean(grey_image)
    std = np.std(grey_image) / 250
    dark_areas = (grey_image < mean - z * std)
    dark_areas = dark_areas.astype(np.uint8)
    blurred = cv2.blur(dark_areas, (5, 5), 0) *255
    return blurred

In [7]:
#legthy image filter
def lengthy_image_filter(image, ratio = 2.5):
# Blur image:
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.GaussianBlur(gray_image, (7, 7), 0)

    # Apply edge detector:
    edges = cv2.Canny(gray_image, 50, 150)

    # Apply morphological operations 
    kernel = np.ones((15, 15), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)
    edges = cv2.erode(edges, kernel, iterations=1)

    # Find contours
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    stencil = np.zeros(gray_image.shape).astype(gray_image.dtype)
    color = [255]
    for contour in contours:
        try:
            # Get fitted bounding box
            rect = cv2.minAreaRect(contour)
            box = cv2.boxPoints(rect)
            box = np.int0(box)
            width = rect[1][0]
            height = rect[1][1]
            
            # Calculate aspect ratio
            aspect_ratio = float(max(width, height)) / min(width, height)
            if aspect_ratio > 2.5:
                cv2.drawContours(stencil, [box], 0, (0, 0, 255), 2)
                cv2.fillPoly(stencil, [box], color)
        except:
            pass
    return stencil

In [22]:
# Function to get an image with edges for the lengthy objects filter:
def lengthy_image_filter_edges(image):
    
    # Read in image as greyscale
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Blur image:
    blurred_image = cv2.GaussianBlur(gray_image, (7, 7), 0)

    # Apply edge detector:
    edges = cv2.Canny(blurred_image, 50, 150)

    # Apply morphological operations 
    kernel = np.ones((15, 15), np.uint8)
    edges = cv2.dilate(edges, kernel, iterations=1)
    edges = cv2.erode(edges, kernel, iterations=1)

    return edges

# Function to get number of contours with aspect ratio > 2:
def extract_lengthy_features_1(edges, defect_polygon):

    # Create defect polygon and calculate its width and height:
    defect_polygon = Polygon(defect_polygon)
    defect_rect_coords = np.array(list(defect_polygon.minimum_rotated_rectangle.exterior.coords)[:-1])
    distances = [np.linalg.norm(defect_rect_coords[i] - defect_rect_coords[(i + 1) % len(defect_rect_coords)]) for i in range(len(defect_rect_coords))]
    defect_width, defect_height = sorted(distances)[0], sorted(distances)[-1]

    # Find contours
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Extract overlapping contours (where >80% of the contour area is overlapping with the polygon):
    overlapping_contours = []
    for contour in contours: 
        if not np.array_equal(contour[0], contour[-1]):
            contour = np.vstack([contour, contour[0:1]])
        contour_as_polygon = Polygon([(x, y) for x, y in contour[:, 0]])
        try:
            intersection = contour_as_polygon.intersection(defect_polygon)
            intersection_area = intersection.area
        except:
            intersection_area = 0
        contour_area = contour_as_polygon.area
        if intersection_area > 0 and intersection_area >= 0.8 * contour_area:
            overlapping_contours.append(contour)

    # Extract characteristics per overlapping contour:
    characteristics = []
    lengths = []
    for contour in overlapping_contours:

        # Get fitted bounding box
        rect = cv2.minAreaRect(contour)
        box = cv2.boxPoints(rect)
        box = np.int0(box)
        width = max(rect[1][0], 1)
        height = max(rect[1][1], 1)
        
        # Calculate aspect ratio
        aspect_ratio = float(max(width, height) / min(width, height))
        characteristics.append(aspect_ratio)
        lengths.append(max(width, height))
    
    # Extract number of contours with aspect ratio >= 2 and the average aspect ratio of these contours:
    number = 0
    avg_aspect_ratio_lengthy = 0
    for aspect_ratio in characteristics:
        if aspect_ratio >= 2.5:
            avg_aspect_ratio_lengthy += aspect_ratio
            number += 1
    avg_aspect_ratio_lengthy = max(avg_aspect_ratio_lengthy / max(number, 1), 1)

    # Extract the quotient (length of the lengthiest contour) / (length of the defect):
    rel_length = (max(lengths) / max(defect_width, defect_height)) if len(lengths) > 0 else 0

    return number, avg_aspect_ratio_lengthy, rel_length


# functions to evaluate / visualize

In [9]:
def get_overlapping_values(filtered_image, defect_polygon):
    # Generate polygon mask:
    mask = np.zeros(filtered_image.shape[:2], dtype=np.uint8)
    cv2.fillPoly(mask, [defect_polygon], 255)

    # Extract overlapping pixel values:
    overlapping_values = filtered_image[mask == 255]
    overlapping_values = overlapping_values.tolist()
    
    return overlapping_values

def get_relative_frequencies(values: list) -> dict:
    
    counts = Counter(values)
    total_count = len(values)
    relative_frequencies = {element: count / total_count for element, count in counts.items()}
    return relative_frequencies

In [10]:
def extract_reddish_feature(image, defect_polygon):

    overlapping_values = get_overlapping_values(image, defect_polygon)
    relative_frequencies = get_relative_frequencies(overlapping_values)
    try:
        quotient = relative_frequencies[255]
    except:
        quotient = 0
    return quotient

In [11]:
def extract_rough_feature(image, defect_polygon):

    overlapping_values = get_overlapping_values(image, defect_polygon)
    relative_frequencies = get_relative_frequencies(overlapping_values)
    try:
        quotient = relative_frequencies[0]
    except:
        quotient = 0
    return quotient

In [12]:
def extract_dark_feature(image, defect_polygon):

    overlapping_values = get_overlapping_values(image, defect_polygon)
    relative_frequencies = get_relative_frequencies(overlapping_values)
    try:
        quotient = relative_frequencies[255]
    except:
        quotient = 0
    return quotient

In [13]:
def extract_lengthy_feature(image, defect_polygon):
    overlapping_values = get_overlapping_values(image, defect_polygon)
    relative_frequencies = get_relative_frequencies(overlapping_values)
    try:
        quotient = relative_frequencies[255]
    except:
        quotient = 0
    return quotient

# main

In [24]:
n_pictures = 50
samples = []
no_defects = 0

for i in range(n_pictures):
    i = str(i).zfill(4)
    if int(i) % 10 == 0:
        print()
        print(f"Processing image {i}")

    #Bild einlesen
    image_path = f"data/dacl10k_v2_devphase/images/train/dacl10k_v2_train_{i}.jpg"
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)

    #Defekte einlesen
    annotations_path = f"data/dacl10k_v2_devphase/annotations/train/dacl10k_v2_train_{i}.json"
    with open(annotations_path, 'r') as file:
        annotations = json.load(file)
    defects = annotations['shapes']

    #Areas finden
    #print("Finding darker areas")
    darker_image = darker_image_filter(image)
    #print("Finding reddish areas")
    reddish_image = reddish_image_filter(image)
    #print("Finding rough areas")
    rough_image = rough_image_filter(image)
    #print("Finding lengthy areas")
    lengthy_image = lengthy_image_filter(image)
    edges_for_lengthy_feature = lengthy_image_filter_edges(image)

    for k in range(len(defects)):
        label = defects[k]['label']
        if label in ["Rust", "Graffiti", "Drainage", "Wetspot", "ExposedRebars", "Crack"]:
            no_defects += 1
            #print("defectpolygon: ", no_defects)
            #plot_picture_with_polygons(image, rough_polygons)
            defect_polygon = np.array(defects[k]['points'], dtype = np.int32)
            #print("Calculating darker quotient")
            darker_quotient = extract_dark_feature(darker_image, defect_polygon)
            #print("Calculating reddish quotient")
            reddish_quotient = extract_reddish_feature(reddish_image, defect_polygon)
            #print("Calculating rough quotient")
            rough_quotient = extract_rough_feature(rough_image, defect_polygon)
            #print("Calculating lengthy quotient")
            lengthy_quotient = extract_lengthy_feature(lengthy_image, defect_polygon)
            number_lengthy_objects, avg_aspect_ratio, rel_length = extract_lengthy_features_1(edges_for_lengthy_feature, defect_polygon)

            temp_dict = {'label': label, 'darker': darker_quotient, 'reddish': reddish_quotient, 'rough': rough_quotient, 'lengthy': lengthy_quotient, 
                         'lengthy_1': number_lengthy_objects, 'lengthy_2': avg_aspect_ratio, 'lengthy_3': rel_length}
            samples.append(temp_dict)

samples = pd.DataFrame(samples)


Processing image 0000


  box = np.int0(box)
  box = np.int0(box)



Processing image 0010

Processing image 0020

Processing image 0030

Processing image 0040


In [25]:
graffiti = samples[samples['label'] == 'Graffiti']
rust = samples[samples['label'] == 'Rust']
drainage = samples[samples['label'] == 'Drainage']
wetspot = samples[samples['label'] == 'Wetspot']
exposedrebars = samples[samples['label'] == 'ExposedRebars']
crack = samples[samples['label'] == 'Crack']

print("Grafitti:")
print(graffiti.loc[:,["darker", "reddish", "rough", "lengthy", "lengthy_1", "lengthy_2", "lengthy_3"]].mean(axis = 0))
print()

print("Rust:")
print(rust.loc[:,["darker", "reddish", "rough", "lengthy", "lengthy_1", "lengthy_2", "lengthy_3"]].mean(axis = 0))
print()

print("Drainage:")
print(drainage.loc[:,["darker", "reddish", "rough", "lengthy", "lengthy_1", "lengthy_2", "lengthy_3"]].mean(axis = 0))
print()

print("Wetspot:")
print(wetspot.loc[:,["darker", "reddish", "rough", "lengthy", "lengthy_1", "lengthy_2", "lengthy_3"]].mean(axis = 0))
print()

print("ExposedRebars:")
print(exposedrebars.loc[:,["darker", "reddish", "rough", "lengthy", "lengthy_1", "lengthy_2", "lengthy_3"]].mean(axis = 0))
print()

print("Crack:")
print(crack.loc[:,["darker", "reddish", "rough", "lengthy", "lengthy_1", "lengthy_2", "lengthy_3"]].mean(axis = 0))

#samples.to_csv('samples.csv', index=False)


Grafitti:
darker       0.333082
reddish      0.142308
rough        0.168847
lengthy      0.276332
lengthy_1    1.826389
lengthy_2    2.440175
lengthy_3    0.221588
dtype: float64

Rust:
darker       0.279984
reddish      0.459263
rough        0.056051
lengthy      0.316078
lengthy_1    0.195652
lengthy_2    1.409282
lengthy_3    0.116220
dtype: float64

Drainage:
darker       0.530238
reddish      0.066249
rough        0.123530
lengthy      0.465275
lengthy_1    0.200000
lengthy_2    1.323928
lengthy_3    0.096764
dtype: float64

Wetspot:
darker       0.820379
reddish      0.089808
rough        0.047576
lengthy      0.255191
lengthy_1    1.090909
lengthy_2    3.933449
lengthy_3    0.117216
dtype: float64

ExposedRebars:
darker       0.402304
reddish      0.427987
rough        0.136835
lengthy      0.235946
lengthy_1    0.285714
lengthy_2    1.342857
lengthy_3    0.361696
dtype: float64

Crack:
darker       0.158233
reddish      0.310179
rough        0.087749
lengthy      0.400276
lengt

## Create datasets and classifier

In [53]:
def create_train_test_data(samples, target_label, features):

    # Create rust labels:
    target_class_df = samples.copy()
    target_class_df["target_label"] = target_class_df["label"].apply(lambda x: 1 if x == target_label else 0)

    # Apply train test split:
    x = target_class_df.loc[:, features]
    y = target_class_df.loc[:, "target_label"]
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

    return x_train, x_test, y_train, y_test


def run_classifier(samples, target_label, features):

    # Generate train test dataset: 
    x_train, x_test, y_train, y_test = create_train_test_data(samples, target_label, features)

    # Initialize the random forest classifier: 
    clf = RandomForestClassifier(n_estimators=100, random_state=42)

    # Train the classifier:
    clf.fit(x_train, y_train)

    # Test the classifier: 
    y_pred = clf.predict(x_test)

    # Evaluate the classification:
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    print(f'Accuracy: {accuracy}')
    print(f'Classification Report:\n{report}')



## Apply classification for different defect classes

**Rust classification**

In [72]:
# Run classifier for rust: 
run_classifier(samples, "Rust", ["reddish", "rough", "darker"])


Accuracy: 0.6937062937062937
Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.56      0.60       296
           1       0.72      0.79      0.75       419

    accuracy                           0.69       715
   macro avg       0.68      0.67      0.68       715
weighted avg       0.69      0.69      0.69       715



**Wetspot classification**

In [73]:
# Rund classifier for wetspots:
run_classifier(samples, "Wetspot", ["darker"])


Accuracy: 0.8909090909090909
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94       672
           1       0.11      0.12      0.11        43

    accuracy                           0.89       715
   macro avg       0.53      0.53      0.53       715
weighted avg       0.89      0.89      0.89       715



**Graffiti classification**

In [74]:
# Run classifier for Graffiti:
run_classifier(samples, "Rust", ["reddish", "rough", "darker"])

Accuracy: 0.6937062937062937
Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.56      0.60       296
           1       0.72      0.79      0.75       419

    accuracy                           0.69       715
   macro avg       0.68      0.67      0.68       715
weighted avg       0.69      0.69      0.69       715



In [75]:
# to do:
# implement blue, yellow, green and shiny filter -> Samuel
# implement colourfullness measure -> Samuel
# implement circle detector -> Samuel
# in rough filter take lbp values in polygon as feature -> Samuel
# rough filter take entropy as feature -> Samuel
# darkness gradient for wetspots -> Felix
# test hough lines detector for lengthy feature -> Felix
# Straightness feature -> Felix


# function dump / not used

In [None]:
#function to calculate the overlapping percentage of a defect polygon with feature polygons
def calculate_overlapping_percentage(defect_polygon, feature_polygons):
    overlapping_quotient = []
    defect_polygon = Polygon(defect_polygon)
    overlapping_area = 0
    for feature_polygon in feature_polygons:
        if len(feature_polygon) > 3:
            feature_polygon = Polygon(feature_polygon)
            try:
                overlapping_area += defect_polygon.intersection(feature_polygon).area
            except:
                pass
    quotient = overlapping_area / defect_polygon.area
    return round(quotient*100, 4)

In [None]:
#find darker areas in an image
def find_darker_areas(image, threshold=100):
    # Convert image to grayscale:
    grey_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # Find dark areas:
    dark_areas = grey_image < threshold
    dark_areas = dark_areas.astype(np.uint8)
    blurred = cv2.blur(dark_areas, (15, 15), 0)
    #plt.imshow(blurred)
    contours, _ = cv2.findContours(blurred, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    polygons = []
    for contour in contours:
        points = []
        for point in contour:
            points.append(*point.tolist())
        polygons.append(points)

    return blurred, polygons

In [None]:
# obsolete function to find reddish areas in an image
def find_reddish_areas(image):

    def color_detector(image, lower_bounds: list, upper_bounds: list) -> list:
        hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        mask = np.zeros(hsv_image.shape[:2], dtype=np.uint8)
        for lower_bound, upper_bound in zip(lower_bounds, upper_bounds):
            lower_bound = np.array(lower_bound)
            upper_bound = np.array(upper_bound)
            mask += cv2.inRange(hsv_image, lower_bound, upper_bound)
        return mask

    rusty_lower_bounds = [[0, 40, 50], [170, 40, 50]]
    rusty_upper_bounds = [[10, 255, 200], [180, 255, 200]]
    blurred = cv2.blur(image, (30, 30), 0)
    reddish_areas = color_detector(blurred, rusty_lower_bounds, rusty_upper_bounds)
    plt.imshow(reddish_areas)
    contours, _ = cv2.findContours(reddish_areas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    polygons = []
    for contour in contours:
        points = []
        for point in contour:
            points.append(*point.tolist())
        polygons.append(points)

    return image, polygons 

In [None]:
def find_rough_areas(image, threshold=8):

    #read in image as greyscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    #apply local binary pattern
    lbp = local_binary_pattern(image, P=8, R=2, method='uniform')

    lbp_uint8 = np.uint8((lbp / lbp.max())*255)
    
    blurred = cv2.blur(lbp_uint8, (5, 5), 0)
    mean = np.mean(blurred)
    std = np.std(blurred)
    median = np.median(blurred)
    #apply thresholding
    _, binary_image = cv2.threshold(blurred, mean - std, 255, cv2.THRESH_BINARY) 

    binary_blurred = cv2.blur(binary_image, (50, 50), 0)
    mean = np.mean(binary_blurred)
    std = np.std(binary_blurred)
    #apply thresholding
    _, binary_image = cv2.threshold(binary_blurred, mean - 1.5 * std, 255, cv2.THRESH_BINARY) # normlaize and convert to uint8, then blur
    plt.imshow(binary_image)
    print(binary_image)
    np.histogram(binary_image, bins=10, range=None, density=None, weights=None)
    
    #plt.imshow(binary_image)
    
    # Uncomment and adjust the contour finding and polygon processing as needed
    
    contours_outer, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    #contours_tree, _ = cv2.findContours(binary_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours, _ = cv2.findContours(binary_image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    #contours_ccomp, _ = cv2.findContours(binary_image, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)

    polygons = []
    for contour in contours[0:-1]:
        points = []
        for point in contour:
            points.append(*point.tolist())
        polygons.append(points)

    return lbp, polygons

image_path = f"data/dacl10k_v2_devphase/images/train/dacl10k_v2_train_0001.jpg"
image = cv2.imread(image_path, cv2.IMREAD_COLOR)
dark_areas, dark_polygons = find_rough_areas(image)
#plot_picture_with_polygons(image, dark_polygons)
print(len(dark_polygons))