## Main Program for classification

# imports

In [1]:
# imports

import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.feature import local_binary_pattern
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import json
from shapely.geometry import Polygon, box
from shapely.affinity import scale, affine_transform
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, recall_score
import time
from sklearn.metrics import confusion_matrix

# main

In [2]:
samples = pd.read_csv("samples_0_6935.csv", sep = ",")

In [3]:
samples.drop(columns = ["Unnamed: 0", "image_number", "defect_number"], inplace = True)

In [4]:
samples.dropna(inplace = True)
samples.to_csv("samples_0_6935_filtered.csv")

In [5]:
graffiti = samples[samples['label'] == 'Graffiti']
rust = samples[samples['label'] == 'Rust']
drainage = samples[samples['label'] == 'Drainage']
wetspot = samples[samples['label'] == 'Wetspot']
exposedrebars = samples[samples['label'] == 'ExposedRebars']
crack = samples[samples['label'] == 'Crack']

print("Grafitti:")
print(graffiti.loc[:,["darker", "gradient", "reddish", "metallic", "colorful", "black", "black_thin", "dominating_color", "color_bin_1", "color_bin_2", 
                      "color_bin_3", "color_bin_4", "color_bin_5", "color_bin_6", "color_bin_7", "color_bin_8", "color_bin_9", "color_entropy", 
                      "rough", "dominating_texture", "texture_0", "texture_1", "texture_2", "texture_3", "texture_4", "texture_5", "texture_6", 
                      "texture_7", "texture_8", "texture_9", "rough_entropy", "lengthy", "number_lengthy_objects", "lengthy_aspect_ratio", "rel_length", 
                      "in_shape", "roundness", "hu_moment_1", "hu_moment_2", "hu_moment_3", "hu_moment_4", "hu_moment_5", "hu_moment_6", 
                      "hu_moment_7"]].mean(axis = 0))
print()

print("Rust:")
print(rust.loc[:,["darker", "gradient", "reddish", "metallic", "colorful", "black", "black_thin", "dominating_color", "color_bin_1", "color_bin_2", 
                      "color_bin_3", "color_bin_4", "color_bin_5", "color_bin_6", "color_bin_7", "color_bin_8", "color_bin_9", "color_entropy", 
                      "rough", "dominating_texture", "texture_0", "texture_1", "texture_2", "texture_3", "texture_4", "texture_5", "texture_6", 
                      "texture_7", "texture_8", "texture_9", "rough_entropy", "lengthy", "number_lengthy_objects", "lengthy_aspect_ratio", "rel_length", 
                      "in_shape", "roundness", "hu_moment_1", "hu_moment_2", "hu_moment_3", "hu_moment_4", "hu_moment_5", "hu_moment_6", 
                      "hu_moment_7"]].mean(axis = 0))
print()

print("Drainage:")
print(drainage.loc[:,["darker", "gradient", "reddish", "metallic", "colorful", "black", "black_thin", "dominating_color", "color_bin_1", "color_bin_2", 
                      "color_bin_3", "color_bin_4", "color_bin_5", "color_bin_6", "color_bin_7", "color_bin_8", "color_bin_9", "color_entropy", 
                      "rough", "dominating_texture", "texture_0", "texture_1", "texture_2", "texture_3", "texture_4", "texture_5", "texture_6", 
                      "texture_7", "texture_8", "texture_9", "rough_entropy", "lengthy", "number_lengthy_objects", "lengthy_aspect_ratio", "rel_length", 
                      "in_shape", "roundness", "hu_moment_1", "hu_moment_2", "hu_moment_3", "hu_moment_4", "hu_moment_5", "hu_moment_6", 
                      "hu_moment_7"]].mean(axis = 0))
print()

print("Wetspot:")
print(wetspot.loc[:,["darker", "gradient", "reddish", "metallic", "colorful", "black", "black_thin", "dominating_color", "color_bin_1", "color_bin_2", 
                      "color_bin_3", "color_bin_4", "color_bin_5", "color_bin_6", "color_bin_7", "color_bin_8", "color_bin_9", "color_entropy", 
                      "rough", "dominating_texture", "texture_0", "texture_1", "texture_2", "texture_3", "texture_4", "texture_5", "texture_6", 
                      "texture_7", "texture_8", "texture_9", "rough_entropy", "lengthy", "number_lengthy_objects", "lengthy_aspect_ratio", "rel_length", 
                      "in_shape", "roundness", "hu_moment_1", "hu_moment_2", "hu_moment_3", "hu_moment_4", "hu_moment_5", "hu_moment_6", 
                      "hu_moment_7"]].mean(axis = 0))
print()

print("ExposedRebars:")
print(exposedrebars.loc[:,["darker", "gradient", "reddish", "metallic", "colorful", "black", "black_thin", "dominating_color", "color_bin_1", "color_bin_2", 
                      "color_bin_3", "color_bin_4", "color_bin_5", "color_bin_6", "color_bin_7", "color_bin_8", "color_bin_9", "color_entropy", 
                      "rough", "dominating_texture", "texture_0", "texture_1", "texture_2", "texture_3", "texture_4", "texture_5", "texture_6", 
                      "texture_7", "texture_8", "texture_9", "rough_entropy", "lengthy", "number_lengthy_objects", "lengthy_aspect_ratio", "rel_length", 
                      "in_shape", "roundness", "hu_moment_1", "hu_moment_2", "hu_moment_3", "hu_moment_4", "hu_moment_5", "hu_moment_6", 
                      "hu_moment_7"]].mean(axis = 0))
print()

print("Crack:")
print(crack.loc[:,["darker", "gradient", "reddish", "metallic", "colorful", "black", "black_thin", "dominating_color", "color_bin_1", "color_bin_2", 
                      "color_bin_3", "color_bin_4", "color_bin_5", "color_bin_6", "color_bin_7", "color_bin_8", "color_bin_9", "color_entropy", 
                      "rough", "dominating_texture", "texture_0", "texture_1", "texture_2", "texture_3", "texture_4", "texture_5", "texture_6", 
                      "texture_7", "texture_8", "texture_9", "rough_entropy", "lengthy", "number_lengthy_objects", "lengthy_aspect_ratio", "rel_length", 
                      "in_shape", "roundness", "hu_moment_1", "hu_moment_2", "hu_moment_3", "hu_moment_4", "hu_moment_5", "hu_moment_6", 
                      "hu_moment_7"]].mean(axis = 0))



Grafitti:
darker                       0.241468
gradient                     0.980432
reddish                      0.109998
metallic                     0.224997
colorful                     0.252521
black                        0.064317
black_thin                   0.081922
dominating_color             3.763704
color_bin_1                  0.232969
color_bin_2                  0.216852
color_bin_3                  0.055611
color_bin_4                  0.047960
color_bin_5                  0.089522
color_bin_6                  0.202269
color_bin_7                  0.061535
color_bin_8                  0.023928
color_bin_9                  0.069355
color_entropy                1.554542
rough                        0.203933
dominating_texture           7.115487
texture_0                    0.063485
texture_1                    0.076174
texture_2                    0.059424
texture_3                    0.093232
texture_4                    0.163839
texture_5                    0.112621
te

## Create datasets and classifier

In [6]:
def create_train_test_data(samples, target_label, features):

    # Create rust labels:
    target_class_df = samples.copy()
    target_class_df["target_label"] = target_class_df["label"].apply(lambda x: 1 if x == target_label else 0)

    # Apply train test split:
    x = target_class_df.loc[:, features]
    y = target_class_df.loc[:, "target_label"]
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

    return x_train, x_test, y_train, y_test


def run_classifier(samples, target_label, features):

    # Generate train test dataset: 
    x_train, x_test, y_train, y_test = create_train_test_data(samples, target_label, features)

    # Initialize the random forest classifier: 
    clf = RandomForestClassifier(n_estimators=100, random_state=42)

    # Train the classifier:
    clf.fit(x_train, y_train)

    # Test the classifier: 
    y_pred = clf.predict(x_test)
    importances = clf.feature_importances_

    feature_importances = pd.DataFrame({'Feature': x_train.columns, 'Importance': importances})
    feature_importances = feature_importances.sort_values(by='Importance', ascending=False)

    # Evaluate the classification:
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    confusion_mat = confusion_matrix(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average=None)[1]

    # Print the confusion matrix
    """
    print("Confusion Matrix:")
    print(confusion_mat)
    print(f'Accuracy: {accuracy}')
    print(f'Classification Report:\n{report}')
    print(feature_importances)
    print(f'Recall: {recall}')
    """
    return recall
    


## Apply classification for different defect classes

In [22]:
starting_features = ["reddish", "rough"]

all_features = ["darker", "gradient", "reddish", "metallic", "colorful", "black", "black_thin", "dominating_color", "color_bin_1", "color_bin_2", 
                "color_bin_3", "color_bin_4", "color_bin_5", "color_bin_6", "color_bin_7", "color_bin_8", "color_bin_9", "color_entropy", 
                "rough", "dominating_texture", "texture_0", "texture_1", "texture_2", "texture_3", "texture_4", "texture_5", "texture_6", 
                "texture_7", "texture_8", "texture_9", "rough_entropy", "lengthy", "number_lengthy_objects", "lengthy_aspect_ratio", "rel_length", 
                "in_shape", "roundness", "hu_moment_1", "hu_moment_2", "hu_moment_3", "hu_moment_4", "hu_moment_5", "hu_moment_6", 
                "hu_moment_7"]

current_rec = run_classifier(samples, "Rust", starting_features)
info = {"Round 0": {"current_feature_list": starting_features, "recall": current_rec}}
current_features = starting_features

for epoch in range(10):

    # Initialize lists for current epoch
    non_current_features = [i for i in all_features if i not in current_features]
    temp_recs = {}

    # Test leaving out each current feature
    for feature in current_features:
        temp_features = current_features.copy()
        temp_features.remove(feature)
        rec = run_classifier(samples, "Rust", temp_features)
        temp_recs[feature] = rec

    # Test adding each non-current feature
    for feature in non_current_features:
        temp_features = non_current_features.copy()
        temp_features.append(feature)
        rec = run_classifier(samples, "Rust", temp_features)
        temp_recs[feature] = rec
    
    # Perform action with the hightest resulting recall
    print(f"Round {epoch}: ", info[f"Round {epoch}"])
    best_new_rec = max(temp_recs.values())
    if best_new_rec > current_rec:
        current_rec = best_new_rec
        feature_for_action = max(temp_recs, key=temp_recs.get)
        if feature_for_action in current_features:
            current_features.remove(feature_for_action)
        else:
            current_features.append(feature_for_action)
    else:
        print("No more improvements possible.")
        break

    # Update info
    info[f"Round {epoch + 1}"] = {"current_feature_list": current_features, "recall": best_new_rec}

    

Round 0:  {'current_feature_list': ['reddish', 'rough'], 'recall': 0.6851424172440339}
Round 1:  {'current_feature_list': ['reddish', 'rough', 'texture_2'], 'recall': 0.7957403130613292}
No more improvements possible.


In [None]:
['gradient', 'reddish', 'metallic', 'dominating_color', 'color_bin_2', 'color_bin_5', 'color_bin_7', 'color_bin_8', 'color_bin_9', 'rough', 'dominating_texture', 'texture_2', 'texture_3', 'texture_6', 'texture_7', 'rel_length', 'in_shape', 'hu_moment_2', 'hu_moment_4', 'hu_moment_5']
['gradient', 'reddish', 'metallic', 'dominating_color', 'color_bin_2', 'color_bin_5', 'color_bin_7', 'color_bin_8', 'color_bin_9', 'rough', 'dominating_texture', 'texture_2', 'texture_3', 'texture_6', 'texture_7', 'in_shape', 'hu_moment_2', 'hu_moment_4', 'hu_moment_5']
['gradient', 'reddish', 'metallic', 'dominating_color', 'color_bin_2', 'color_bin_5', 'color_bin_7', 'color_bin_9', 'rough', 'dominating_texture', 'texture_2', 'texture_3', 'texture_6', 'texture_7', 'in_shape', 'hu_moment_2', 'hu_moment_4', 'hu_moment_5']
['gradient', 'reddish', 'metallic', 'dominating_color', 'color_bin_2', 'color_bin_5', 'color_bin_7', 'color_bin_9', 'rough', 'dominating_texture', 'texture_2', 'texture_3', 'texture_6', 'texture_7', 'in_shape', 'hu_moment_4', 'hu_moment_5']



**Rust classification**

In [16]:
# Run classifier for rust: 
run_classifier(samples, "Rust", ["gradient", "reddish", "metallic", "dominating_color", "color_bin_2", "color_bin_5", "color_bin_7", 
                                 "color_bin_8", "color_bin_9", "rough", "dominating_texture", "texture_2", "texture_3", "texture_6", 
                                 "texture_7", "rel_length", "in_shape", "hu_moment_2", "hu_moment_4", "hu_moment_5"])

0.8049781883500128

**Wetspot classification**

In [9]:
# Rund classifier for wetspots:
run_classifier(samples, "Wetspot", ["darker", "metallic", "colorful", "black", "color_bin_2", "color_bin_3", "color_bin_4", 
                                    "color_bin_5", "color_bin_8", "rough", "texture_2", "texture_3", "texture_4", 
                                    "texture_8", "number_lengthy_objects", "lengthy_aspect_ratio", "rel_length", "in_shape", 
                                    "hu_moment_4", "hu_moment_5"])


0.1571753986332574

**Exposed Rebars**

In [10]:
# Rund classifier for exposed rebars:
run_classifier(samples, "ExposedRebars", ['darker',
 'colorful',
 'dominating_color',
 'color_bin_1',
 'color_bin_2',
 'color_bin_9',
 'color_entropy',
 'texture_1',
 'texture_4',
 'texture_5',
 'texture_6',
 'texture_7',
 'texture_9',
 'lengthy',
 'number_lengthy_objects',
 'lengthy_aspect_ratio',
 'in_shape',
 'roundness',
 'hu_moment_1',
 'hu_moment_4'])

0.030710172744721688

**Graffiti classification**

In [20]:
# Run classifier for Graffiti:
run_classifier(samples, "Graffiti", ["reddish", "metallic", "colorful", "black_thin", "dominating_color", "color_bin_2",
                      "color_bin_4", "color_bin_5", "color_bin_6", "color_bin_7", "color_bin_9",
                      "dominating_texture", "texture_1", "texture_2",
                      "texture_7", "texture_9", "rel_length",
                      "in_shape", "roundness", "hu_moment_5", "hu_moment_6"])

0.25512104283054005

**Drainage classification**

In [23]:
# Run classifier for Graffiti:
run_classifier(samples, "Drainage", ["reddish", "black", "color_bin_5", "color_bin_7", "color_bin_9", "color_entropy", 
                      "rough", "texture_0", "texture_2", "texture_3", "texture_6", 
                      "texture_8", "rough_entropy", "rel_length", 
                      "in_shape", "roundness", "hu_moment_2", "hu_moment_4", "hu_moment_5", "hu_moment_6"])

0.05804749340369393

**Crack**

In [22]:
# Run classifier for Crack:
run_classifier(samples, "Crack", ['darker',
 'gradient',
 'reddish',
 'colorful',
 'black',
 'dominating_color',
 'color_bin_2',
 'color_bin_3',
 'rough',
 'texture_0',
 'texture_3',
 'texture_5',
 'texture_6',
 'texture_8',
 'texture_9',
 'lengthy',
 'number_lengthy_objects',
 'lengthy_aspect_ratio',
 'hu_moment_1',
 'hu_moment_2',
 'hu_moment_3'])

0.5149171270718232