# imports

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

from glob import glob as glob
from pathlib import Path

from sklearn.model_selection import train_test_split
import tensorflow as tf

# Getting Images

In [3]:
processed_crackdir = Path("../data/processed/pro_crack_img")
noncrack_dir = Path("../data/external/Non-crack")

def generate_df(image_dir, label):
    filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name='Filepath').astype(str)
    labels = pd.Series(label, name='Label', index=filepaths.index)
    df = pd.concat([filepaths, labels], axis=1)
    return df

In [4]:
crack_df = generate_df(processed_crackdir, label="CRACK_IMAGE")
noncrack_df = generate_df(noncrack_dir,label="NONCRACK_IMAGE")
total_df = pd.concat([crack_df,noncrack_df],axis=0).sample(frac=1.0,random_state=1).reset_index(drop=True)

total_df

Unnamed: 0,Filepath,Label
0,..\data\external\Non-crack\08623.jpg,NONCRACK_IMAGE
1,..\data\external\Non-crack\05028.jpg,NONCRACK_IMAGE
2,..\data\external\Non-crack\15926.jpg,NONCRACK_IMAGE
3,..\data\external\Non-crack\15710.jpg,NONCRACK_IMAGE
4,..\data\external\Non-crack\17857.jpg,NONCRACK_IMAGE
...,...,...
20005,..\data\external\Non-crack\10946.jpg,NONCRACK_IMAGE
20006,..\data\external\Non-crack\17280.jpg,NONCRACK_IMAGE
20007,..\data\external\Non-crack\05183.jpg,NONCRACK_IMAGE
20008,..\data\external\Non-crack\12163.jpg,NONCRACK_IMAGE


# Test-Train split

In [5]:
train_df, test_df = train_test_split(
    total_df.sample(6000, random_state=1),
    train_size=0.7,
    shuffle=True,
    random_state=1
)

# Loading/Adding Images

In [6]:
train_gen = tf.keras.preprocessing.image.ImageDataGenerator(
                rescale = 1./255,
                validation_split = 0.2
)

test_gen = tf.keras.preprocessing.image.ImageDataGenerator(
                rescale = 1./255
)

In [7]:
train_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=24,
    shuffle=True,
    seed=42,
    subset='training'
)

val_data = train_gen.flow_from_dataframe(
    train_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=24,
    shuffle=True,
    seed=42,
    subset='validation'
)

test_data = train_gen.flow_from_dataframe(
    test_df,
    x_col='Filepath',
    y_col='Label',
    target_size=(120, 120),
    color_mode='rgb',
    class_mode='binary',
    batch_size=24,
    shuffle=False,
    seed=42
)

Found 3360 validated image filenames belonging to 2 classes.
Found 840 validated image filenames belonging to 2 classes.
Found 1800 validated image filenames belonging to 2 classes.


# Training

In [None]:
inputs = tf.keras.Input(shape=(120, 120, 3))
x = tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation='relu')(inputs)
x = tf.keras.layers.MaxPool2D(pool_size=(2, 2))(x)
x = tf.keras.layers.Conv2D(filters=24, kernel_size=(3, 3), activation='relu')(x)
x = tf.keras.layers.MaxPool2D(pool_size=(2, 2))(x)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

print(model.summary())

None


In [9]:
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=100,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=3,
            restore_best_weights=True
        )
    ]
)

  self._warn_if_super_not_called()


Epoch 1/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 594ms/step - accuracy: 0.9998 - loss: 0.1231 - val_accuracy: 0.9976 - val_loss: 0.0056
Epoch 2/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 94ms/step - accuracy: 1.0000 - loss: 5.7223e-04 - val_accuracy: 0.9976 - val_loss: 0.0056
Epoch 3/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 90ms/step - accuracy: 0.9996 - loss: 0.0011 - val_accuracy: 0.9976 - val_loss: 0.0053
Epoch 4/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 91ms/step - accuracy: 0.9999 - loss: 2.7091e-04 - val_accuracy: 0.9976 - val_loss: 0.0047
Epoch 5/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 92ms/step - accuracy: 0.9993 - loss: 0.0012 - val_accuracy: 0.9976 - val_loss: 0.0040
Epoch 6/100
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 90ms/step - accuracy: 0.9998 - loss: 3.6162e-04 - val_accuracy: 0.9976 - val_loss: 0.00

KeyboardInterrupt: 

In [None]:
print(history.history.keys())

fig = plt.line(
    history.history,
    y=['loss', 'val_loss'],
    labels={'index': "Epoch", 'value': "Loss"},
    title="Training and Validation Loss Over Time"
)
fig.show()