### Importing Libraries

In [1]:
import numpy as np
import pandas as pd 
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from pathlib import Path
from sklearn.model_selection import train_test_split

import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.metrics import confusion_matrix, classification_report

from keras.models import model_from_json

### Displaying Sample Crack and Non-Cracked Images

In [2]:
pos_sample = plt.imread('../input/surface-crack-detection/Positive/00069.jpg')
neg_sample = plt.imread('../input/surface-crack-detection/Negative/00420.jpg')
plt.figure()
fig, ax = plt.subplots(1,2, figsize=(12,12), sharey=True)
ax[0].imshow(pos_sample)
ax[0].set_title("Positive sample")
ax[1].imshow(neg_sample)
ax[1].set_title("Negative sample")

plt.show()

### Displaying Threshold, Canny, and Contour Images

In [3]:
import cv2

plt.style.use("dark_background")
figure,axis = plt.subplots(nrows=1,ncols=3,figsize=(12,12))

Reading_Img = cv2.imread("../input/surface-crack-detection/Positive/00420.jpg", cv2.IMREAD_GRAYSCALE)
Reading_Img = cv2.cvtColor(Reading_Img,cv2.COLOR_BGR2RGB)

_,Threshold_Img = cv2.threshold(Reading_Img,150,255,cv2.THRESH_BINARY_INV)
Canny_Img = cv2.Canny(Threshold_Img,90,100)
contours,_ = cv2.findContours(Canny_Img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
Draw_Contours = cv2.drawContours(Reading_Img,contours,-1,(255,0,0),1)

axis[0].imshow(Threshold_Img)
axis[1].imshow(Canny_Img)
axis[2].imshow(Draw_Contours)

print("Visualizing Sample Image in Threshold, Canny & Contours\n=====================================================\n")

### Setting the path to the dataset

In [4]:
pos_dir = Path('../input/surface-crack-detection/Positive')
neg_dir = Path('../input/surface-crack-detection/Negative')
print('Done!')

### Function to create Image-Label Dataframe

In [5]:
def generate_df(image_dir, label):
    filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name='Filepath').astype(str)
    labels = pd.Series(label, name='Label', index=filepaths.index)
    df = pd.concat([filepaths, labels], axis=1)
    return df
print('Done!')

### Passing the image into the function and concatenating the returned value

In [6]:
pos_df = generate_df(pos_dir, "POSITIVE")
neg_df = generate_df(neg_dir, "NEGATIVE")

all_df = pd.concat([pos_df, neg_df], axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)
all_df
print('Done!')

### Splitting the data into test and train

In [7]:
train_df, test_df = train_test_split(
    all_df.sample(40000, random_state=1),
    train_size = 0.7,
    shuffle=True,
    random_state=1
)
print('Done!')

### Pre-processing the images

In [8]:
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255, #pixel values down into a range of between 0 and 1
    validation_split=0.2
)

test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255
)
print('Done!')

In [9]:
train_data = train_gen.flow_from_dataframe(
    train_df, #load from train set
    x_col='Filepath', 
    y_col='Label',
    target_size=(128,128), #image size
    color_mode='rgb', 
    class_mode='binary', #binary classification
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='training'
)

val_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(128,128),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_data = test_gen.flow_from_dataframe(
    test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(128,128),
    color_mode='rgb',
    class_mode='binary',
    batch_size=32,
    shuffle=False,
    seed=42,
)

print('Done!')

### Creating various CNN models

In [10]:
basic_model = tf.keras.Sequential([
    layers.InputLayer(input_shape=[128,128,3]),
    
    layers.Conv2D(filters=16, kernel_size=3, activation='relu', padding='same'),
    layers.MaxPool2D(),
    
    layers.Flatten(),
    layers.Dense(1, activation='sigmoid')
], name='basic_model')
print('Done!')

In [11]:
wider_model = tf.keras.Sequential([
    layers.InputLayer(input_shape=[128,128,3]),
    
    layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(pool_size=(2,2)),
    
    layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(pool_size=(2,2)),
    
    layers.Flatten(),
    layers.Dense(1, activation='sigmoid')
], name='wider_model')
print('Done!')

In [12]:
deeper_model = tf.keras.Sequential([
    layers.InputLayer(input_shape=[128,128,3]),
    
    layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(),
    
    layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(),
    
    layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(),
    
    layers.Flatten(),
    layers.Dense(1, activation='sigmoid')
], name='deeper_model')
print('Done!')

In [13]:
global_avg_model = tf.keras.Sequential([
    layers.InputLayer(input_shape=[128,128,3]),
    
    layers.Conv2D(filters=8, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(),
    
    layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(),
    
    layers.GlobalAveragePooling2D(),
    layers.Dense(1, activation='sigmoid')
], name='global_avg_model')
print('Done!')

In [14]:
dropout_model = tf.keras.Sequential([
    layers.InputLayer(input_shape=[128,128,3]),
    
    layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(),
    
    layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same'),
    layers.MaxPool2D(),
    
    layers.Flatten(),
    layers.Dense(4, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(1, activation='sigmoid')
], name='dropout_model')
print('Done!')

In [15]:
batch_normal_model = tf.keras.Sequential([
    layers.InputLayer(input_shape=[128,128,3]),
    
    layers.Conv2D(filters=8, kernel_size=(3,3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    
    layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPool2D(),
    
    layers.Flatten(),
    layers.Dense(1, activation='sigmoid')
], name='batch_normal_model')
print('Done!')

### Creating fucntions to:
#### 1. Compile
#### 2. Fit
#### 3.Train
#### 4. Save
#### 5. Load
#### 6. Visualize results of the model


In [16]:
def compile_model(model, optimizer='adam'): 
    model.compile(
        optimizer=optimizer, #adam optimizer
        loss='binary_crossentropy', #loss function
        metrics=['binary_accuracy'] 
    )
    model.summary()
    
def fit_model(model, train_data, val_data):
    history = model.fit(
        train_data,
        validation_data=val_data,
        epochs=100,
        callbacks=[
            tf.keras.callbacks.EarlyStopping( #early stopping to preventing overfit
                monitor='val_loss', 
                patience=3,
                restore_best_weights=True #if the validation loss is increasing for 3 epochs, stop and restore the weights
            )
        ]
    )
    return history

def plot_history(history):
    history_frame = pd.DataFrame(history.history)
    history_frame.loc[:, ['loss', 'val_loss']].plot()
    history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();
    
def save_model(model):
    model_json = model.to_json()
    with open(model.name + ".json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights(model.name + ".h5")
    print(model.name + " is saved")

def load_model(model):
    json_file = open(model.name + ".json", "r")
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json)
    loaded_model.load_weights(model.name + ".h5")
    print(model.name + " is loaded")
    return loaded_model

def train_model(model, optimizer='adam', train_data=train_data, val_data=val_data):
    compile_model(model, optimizer)
    history = fit_model(model, train_data, val_data)
    #save_model(model)
    plot_history(history)

print('Done!')

### Training the models

In [17]:
train_model(basic_model)
print('Done!')

In [18]:
train_model(wider_model)
print('Done!')

In [19]:
train_model(deeper_model)
print('Done!')

In [20]:
train_model(global_avg_model)
print('Done!')

In [21]:
train_model(batch_normal_model)
print('Done!')

In [22]:
train_model(dropout_model)
print('Done!')

### Functions to evaluate the models

In [23]:
def evaluate_model(model, test_data=test_data):
    results = model.evaluate(test_data, verbose=0)
    print("Model name: ", model.name)
    print("Test Loss: {:.5f}".format(results[0]))
    print("Test Accuracy: {:.3f}%".format(results[1]*100))
    print("-"*20)


def evaluate_model_cc(model, test_data=test_data):
    results = model.evaluate(test_data, verbose=0)
    loss = results[0]
    acc = results[1]
    
    print("Test Loss: {:.5f}".format(loss))
    print("Test Accuracy: {:.3f}%".format(acc*100))
    
    y_pred = np.squeeze((model.predict(test_data) >= 0.5).astype(np.int64))
    cm = confusion_matrix(test_data.labels, y_pred)
    clr = classification_report(test_data.labels, y_pred, target_names=["NEGATIVE","POSITIVE"])
    
    plt.figure(figsize=(6,6))
    sns.heatmap(cm, annot=True, fmt='g', vmin=0, cmap='Blues', cbar=False)
    plt.xticks(ticks=np.arange(2) + 0.5, labels=["NEGATIVE", "POSITIVE"])
    plt.yticks(ticks=np.arange(2) + 0.5, labels=["NEGATIVE", "POSITIVE"])
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    plt.show()
    
    print("Classification Report:\n")
    print("-"*60)
    print(clr)

print('Done!')

### Evaluating all the models

In [24]:
evaluate_model_cc(batch_normal_model)

In [25]:
evaluate_model_cc(global_avg_model)

In [26]:
evaluate_model_cc(basic_model)

In [27]:
evaluate_model_cc(deeper_model)

In [28]:
evaluate_model_cc(wider_model)

In [29]:
evaluate_model_cc(dropout_model)

### Prediction using the model with the highest accuracy

In [32]:
image = tf.keras.preprocessing.image.load_img("../input/surface-crack-detection/Positive/00069.jpg", target_size=(128,128))
input_arr = np.array([tf.keras.preprocessing.image.img_to_array(image)]).astype('float32') / 255
predictions = batch_normal_model.predict(input_arr)

plt.figure()
plt.imshow(image)

if(predictions[0]>=0.5):
    plt.title("Crack Detected")
else:
    plt.title("No Crack Detected")

plt.style.use("dark_background")
figure,axis = plt.subplots(nrows=1,ncols=3,figsize=(12,12))

Reading_Img = cv2.imread("../input/surface-crack-detection/Positive/00069.jpg", cv2.IMREAD_GRAYSCALE)
Reading_Img = cv2.cvtColor(Reading_Img,cv2.COLOR_BGR2RGB)

_,Threshold_Img = cv2.threshold(Reading_Img,150,255,cv2.THRESH_BINARY_INV)
Canny_Img = cv2.Canny(Threshold_Img,90,100)
contours,_ = cv2.findContours(Canny_Img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
Draw_Contours = cv2.drawContours(Reading_Img,contours,-1,(255,0,0),1)

axis[0].imshow(Threshold_Img)
axis[1].imshow(Canny_Img)
axis[2].imshow(Draw_Contours)

print("Visualizing Sample Image in Threshold, Canny & Contours\n=====================================================\n")    

### Saving the various models

In [33]:
basic_model.save("basic_model.h5")
wider_model.save("wider_model2.h5")
deeper_model.save("deeper_model.h5")
global_avg_model.save("global_avg_model.h5")
batch_normal_model.save("batch_normal_model.h5")
batch_normal_model.save("dropout_modell.h5")
print("Done")