# Using AI Model to detect damaged packages in an in-line quality inspection


### Import Libraries

In [None]:
!pip install -r requirements.txt

In [None]:
import os
import cv2
import numpy as np
from collections import Counter
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input


In [None]:

# Defining the directories of the data
root_dir = os.path.join(os.getcwd(),"data")
intact_side_dir = os.path.join(root_dir, 'intact/side/')
intact_top_dir = os.path.join(root_dir, 'intact/top/')
damaged_side_dir = os.path.join(root_dir, 'damaged/side/')
damaged_top_dir = os.path.join(root_dir, 'damaged/top/')


# Load Images Function
def load_images(dir):
    images = []
    for file in os.listdir(dir):
        if file.endswith('.jpg') or file.endswith('.png'):
            img_path = os.path.join(dir, file)
            img = cv2.imread(img_path)
            if img is not None:
                images.append(img)
    return images

# Load the images from different categories
damaged_top_images = load_images(damaged_top_dir)
intact_side_images = load_images(intact_side_dir)
damaged_side_images = load_images(damaged_side_dir)
intact_top_images = load_images(intact_top_dir)

# Creating labels for the damaged and intact images
damaged_labels = np.ones(len(damaged_side_images) + len(damaged_top_images))
intact_labels = np.zeros(len(intact_side_images) + len(intact_top_images))

# Concatenating the images and labels
image_data = np.concatenate((damaged_side_images, intact_side_images, intact_top_images, damaged_top_images))
image_labels = np.concatenate((damaged_labels, intact_labels))

# Making sure that the data is loaded correctly by printing the shape of the loaded data
print("Shape of images:", image_data.shape)
print("Shape of labels:", image_labels.shape)

In [None]:
# Generating an array of indices for the data to be shuffled
indices = np.arange(len(image_data))
np.random.shuffle(indices)

# Use the shuffled indices to shuffle the data and labels arrays
image_data = image_data[indices]
image_labels = image_labels[indices]

By shuffling the data and labels arrays in this way, it is ensured that the model sees a diverse range of samples during each training phase and avoids learning patterns based on the order of the samples in the dataset. This can lead to a more robust and generalizable model.

In [None]:


# Split the data into training, validation, and testing sets
num_samples = len(image_data)
num_train = int(num_samples * 0.7)
num_val = int(num_samples * 0.15)
num_test = num_samples - num_train - num_val

train_data = image_data[:num_train]
train_labels = image_labels[:num_train]

val_data = image_data[num_train:num_train+num_val]
val_labels = image_labels[num_train:num_train+num_val]

test_data = image_data[num_train+num_val:]
test_labels = image_labels[num_train+num_val:]

print("Number of training samples:", len(train_data))
print("Number of validation samples:", len(val_data))
print("Number of testing samples:", len(test_data))


In the above code snippet the data is divided into three sets training, validation, and testing.

**Training set:**
-  The part of the dataset used to train the machine learning model.

**Validation Set:**
- It is used during the training process to evaluate the performance of the model and fine-tune hyperparameters.
- It helps in monitoring the model's performance on unseen data and detecting overfitting or underfitting.
- The validation set influences decisions about the model architecture, such as choosing the number of layers, units, or dropout rates.
- Typically, the validation set is used multiple times during the training process, and adjustments to the model are made based on its performance on this set.

**Test Set:**
- Used only once after the model has been trained and validated.
- It provides an unbiased valuation of the model's performance on unseen data, serving as a final check for the model.
- The test set should be completely separate from the training and validation sets, ensuring that the model's performance is evaluated on data that it has never seen before.


In [None]:
# calculating and printing the distribution of labels indicating whether the packages are damaged or intact in the training, validation, and testing sets

unique_labels, label_counts = np.unique(train_labels, return_counts=True)
print("training:", (dict(zip(unique_labels, label_counts))))

unique_labels, label_counts = np.unique(val_labels, return_counts=True)
print("validation:",(dict(zip(unique_labels, label_counts))))

unique_labels, label_counts = np.unique(test_labels, return_counts=True)
print("testing:", (dict(zip(unique_labels, label_counts))))


> If the sets is not balanced between intact (0.0) and damaged (1.0), the data is shuffeled again by the snippet code showed before for the model to be effective.

In [None]:
# Model Architecture Definition

nn = Sequential()
nn.add(Conv2D(filters=64, kernel_size=3, activation='relu', input_shape=(540, 960, 3)))
nn.add(MaxPooling2D(pool_size=2))
nn.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
nn.add(MaxPooling2D(pool_size=2))
nn.add(Flatten())
nn.add(Dense(64, activation='relu'))
nn.add(Dense(1, activation='sigmoid'))

# Compiling the Model
nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Printing a summary of the model architecture, including the number of parameters in each layer and the total number of trainable parameters.
nn.summary()

In [None]:
# Training the Model

history = nn.fit(train_data, train_labels, batch_size=32, epochs=10, validation_data=(val_data, val_labels))

In [None]:
test_loss, test_accuracy = nn.evaluate(test_data, test_labels)
print(f"Test loss: ",test_loss)
print(f"Test accuracy: ",test_accuracy)