<a href="https://colab.research.google.com/github/SamuelK87/Machine-vision-based-defect-detection-in-welding-process/blob/master/Weld_def_10cls_small_VGGnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# USAGE
# python train.py --dataset dataset --model pokedex.model --labelbin lb.pickle

# set the matplotlib backend so figures can be saved in the background
import matplotlib

matplotlib.use("Agg")
import pandas as pd
# import the necessary packages
from keras.models import Sequential
from sklearn.metrics import classification_report
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K
from keras.callbacks import ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
#from pyimagesearch.smallervggnet import SmallerVGGNet
import matplotlib.pyplot as plt
from imutils import paths
import numpy as np
import argparse
import random
import pickle
import cv2
import os

#######################################################################

class SmallerVGGNet:
	@staticmethod
	def build(width, height, depth, classes):
		# initialize the model along with the input shape to be
		# "channels last" and the channels dimension itself
		model = Sequential()
		inputShape = (height, width, depth)
		chanDim = -1

		# if we are using "channels first", update the input shape
		# and channels dimension
		if K.image_data_format() == "channels_first":
			inputShape = (depth, height, width)
			chanDim = 1

		# CONV => RELU => POOL
		model.add(Conv2D(32, (3, 3), padding="same",
			input_shape=inputShape))
		model.add(Activation("relu"))
		model.add(BatchNormalization(axis=chanDim))
		model.add(MaxPooling2D(pool_size=(3, 3)))
		model.add(Dropout(0.25))

		# (CONV => RELU) * 2 => POOL
		model.add(Conv2D(64, (3, 3), padding="same"))
		model.add(Activation("relu"))
		model.add(BatchNormalization(axis=chanDim))
		model.add(Conv2D(64, (3, 3), padding="same"))
		model.add(Activation("relu"))
		model.add(BatchNormalization(axis=chanDim))
		model.add(MaxPooling2D(pool_size=(2, 2)))
		model.add(Dropout(0.25))

		# (CONV => RELU) * 2 => POOL
		model.add(Conv2D(128, (3, 3), padding="same"))
		model.add(Activation("relu"))
		model.add(BatchNormalization(axis=chanDim))
		model.add(Conv2D(128, (3, 3), padding="same"))
		model.add(Activation("relu"))
		model.add(BatchNormalization(axis=chanDim))
		model.add(MaxPooling2D(pool_size=(2, 2)))
		model.add(Dropout(0.25))

		# first (and only) set of FC => RELU layers
		model.add(Flatten())
		model.add(Dense(1024))
		model.add(Activation("relu"))
		model.add(BatchNormalization())
		model.add(Dropout(0.5))

		# softmax classifier
		model.add(Dense(classes))
		model.add(Activation("softmax"))

		# return the constructed network architecture
		return model


##############################################################################




# construct the argument parse and parse the arguments
#ap = argparse.ArgumentParser()
#ap.add_argument("-d", "--dataset", required=True,
#                help="path to input dataset (i.e., directory of images)")
#ap.add_argument("-m", "--model", required=True,
#                help="path to output model")
#ap.add_argument("-l", "--labelbin", required=True,
#                help="path to output label binarizer")
#ap.add_argument("-p", "--plot", type=str, default="plot.png",
#                help="path to output accuracy/loss plot")
#args = vars(ap.parse_args())

# initialize the number of epochs to train for, initial learning rate,
# batch size, and image dimensions
EPOCHS = 400
INIT_LR = 1e-3
BS = 32
IMAGE_DIMS = (96, 96, 3)

# initialize the data and labels
data = []
labels = []

# grab the image paths and randomly shuffle them
print("[INFO] loading images...")
imagePaths = sorted(list(paths.list_images('/content/drive/My Drive/Colab Notebooks/weld_dataset_10cls (1)')))
random.seed(42)
random.shuffle(imagePaths)

# loop over the input images
for imagePath in imagePaths:
    # load the image, pre-process it, and store it in the data list
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = img_to_array(image)
    data.append(image)

    # extract the class label from the image path and update the
    # labels list
    label = imagePath.split(os.path.sep)[-2]
    labels.append(label)

# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
print("[INFO] data matrix: {:.2f}MB".format(
    data.nbytes / (1024 * 1000.0)))

# binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels, test_size=0.2, random_state=42)

# account for skew in the labeled data
classTotals = trainY.sum(axis=0)
classWeight = classTotals.max() / classTotals

# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=False, width_shift_range=0.1,
                         height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
                         horizontal_flip=True, vertical_flip=True, fill_mode="nearest")

# initialize the model
print("[INFO] compiling model...")
model = SmallerVGGNet.build(width=IMAGE_DIMS[1], height=IMAGE_DIMS[0],
                            depth=IMAGE_DIMS[2], classes=len(lb.classes_))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt,
              metrics=["accuracy"])

# checkpoint
filepath="/content/drive/My Drive/Colab Notebooks/Results/checkpoint/epochs:{epoch:03d}-val_acc:{val_accuracy:.3f}.hdf5"
#filepath="/content/drive/My Drive/Colab Notebooks/Results/weld_def_10cls/best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor= 'val_accuracy' , verbose=1, save_best_only=True,
    mode= 'max' )
callbacks_list = [checkpoint]

# train the network
print("[INFO] training network...")
H = model.fit_generator(
    aug.flow(trainX, trainY, batch_size=BS),
    validation_data=(testX, testY),
    steps_per_epoch=len(trainX) // BS,
    epochs=EPOCHS,
    class_weight=classWeight, callbacks=callbacks_list,
    verbose=1)

# save the model to disk
print("[INFO] serializing network...")
model.save('/content/drive/My Drive/Colab Notebooks/Results/weld_def_10cls/Wld_10cls.model')

# save the label binarizer to disk
print("[INFO] serializing label binarizer...")
f = open('/content/drive/My Drive/Colab Notebooks/Results/weld_def_10cls/lb.pickle', "wb")
f.write(pickle.dumps(lb))
f.close()

# save the training history
# convert the history.history dict to a pandas DataFrame:     
hist_df = pd.DataFrame(H.history)
hist_csv_file = '/content/drive/My Drive/Colab Notebooks/Results/weld_def_10cls/history.csv'
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)


# make predictions on the testing set
print("[INFO] evaluating network...")
predIdxs = model.predict(testX, batch_size=BS)

# for each image in the testing set we need to find the index of the
# label with corresponding largest predicted probability
predIdxs = np.argmax(predIdxs, axis=1)

# show a nicely formatted classification report
print(classification_report(testY.argmax(axis=1), predIdxs,
	target_names=lb.classes_))



# plot the training loss and accuracy
plt.style.use("ggplot")
plt.figure()
N = EPOCHS
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper right")
plt.savefig('/content/drive/My Drive/Colab Notebooks/Results/weld_def_10cls/plot.png',dpi=300)

[INFO] loading images...
[INFO] data matrix: 194.62MB
[INFO] compiling model...
[INFO] training network...
Epoch 1/400

Epoch 00001: val_accuracy improved from -inf to 0.12155, saving model to /content/drive/My Drive/Colab Notebooks/Results/checkpoint/epochs:001-val_acc:0.122.hdf5
Epoch 2/400

Epoch 00002: val_accuracy improved from 0.12155 to 0.16022, saving model to /content/drive/My Drive/Colab Notebooks/Results/checkpoint/epochs:002-val_acc:0.160.hdf5
Epoch 3/400

Epoch 00003: val_accuracy improved from 0.16022 to 0.18785, saving model to /content/drive/My Drive/Colab Notebooks/Results/checkpoint/epochs:003-val_acc:0.188.hdf5
Epoch 4/400

Epoch 00004: val_accuracy improved from 0.18785 to 0.19337, saving model to /content/drive/My Drive/Colab Notebooks/Results/checkpoint/epochs:004-val_acc:0.193.hdf5
Epoch 5/400

Epoch 00005: val_accuracy improved from 0.19337 to 0.20442, saving model to /content/drive/My Drive/Colab Notebooks/Results/checkpoint/epochs:005-val_acc:0.204.hdf5
Epoch 

  _warn_prf(average, modifier, msg_start, len(result))


                          precision    recall  f1-score   support

        Cluster porosity       0.91      0.77      0.83        13
               No defect       1.00      1.00      1.00        21
            cap undercut       0.41      0.88      0.56        17
lack of root penetration       1.00      0.35      0.52        17
      longitudinal crack       0.95      1.00      0.97        36
     parallel slag lines       1.00      1.00      1.00        14
           root undercut       0.57      0.73      0.64        11
       silica inclusions       0.00      0.00      0.00        11
               slag line       0.64      0.90      0.75        10
        transverse crack       0.92      0.77      0.84        31

                accuracy                           0.79       181
               macro avg       0.74      0.74      0.71       181
            weighted avg       0.81      0.79      0.77       181

