# Semantic Segmentation Model To Detect Stones In The Image 
## A breif documentation regarding this model has been provided alongside this .ipynb file. Please refer to that documentation for better understanding.

### Import the necessary libraries
    While importing segmentation_models module, you can encounter "keras.engine not found" error, to fix that please use "os.environ['SM_FRAMEWORK'] = 'tf.keras' "
    The respective version of each library are given alongside the import statements.


In [9]:
# ther versions have been mentioned alongside the import statements
import os
import numpy as np # 1.25.2
import cv2 # 4.9.0
import random
import albumentations as A # 1.4.1
os.environ["SM_FRAMEWORK"] = "tf.keras"
import segmentation_models as sm # 1.0.1
from segmentation_models import get_preprocessing
import sklearn #1.4.1.post1
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt # 3.7.2
from keras.callbacks import EarlyStopping
import keras # 3.0.5



In [2]:
# The dataset contains the images and labels provided in the coding challenge
datasetPath = './Dataset'

### Data Preperation

In [3]:
# adding the path to images and labels in respective lists

imagesList = []
labelsList = []

for file in os.listdir(datasetPath):
    # image file end with ".JPG" where as label files end with ".jpg" extension
    if file.endswith('.JPG'):
        imagesList.append(os.path.join(datasetPath, file))
        label = file.split('.')[0] + '_label.jpg'
        labelsList.append(os.path.join(datasetPath, label))

In [4]:
# checking the type of content in the lists

print("List containing path of images is : ", imagesList)
print("Type of content in image list is : ", type(imagesList[0]))

print("List containign path of labels is : ", labelsList)
print("Type of content in label list is : ",type(labelsList[0]))

List containing path of images is :  ['./Dataset/Image14.JPG', './Dataset/Image15.JPG', './Dataset/Image01.JPG', './Dataset/Image17.JPG', './Dataset/Image03.JPG', './Dataset/Image02.JPG', './Dataset/Image16.JPG', './Dataset/Image12.JPG', './Dataset/Image06.JPG', './Dataset/Image07.JPG', './Dataset/Image13.JPG', './Dataset/Image05.JPG', './Dataset/Image11.JPG', './Dataset/Image10.JPG', './Dataset/Image04.JPG', './Dataset/Image09.JPG', './Dataset/Image08.JPG', './Dataset/Image20.JPG', './Dataset/Image18.JPG', './Dataset/Image19.JPG']
Type of content in image list is :  <class 'str'>
List containign path of labels is :  ['./Dataset/Image14_label.jpg', './Dataset/Image15_label.jpg', './Dataset/Image01_label.jpg', './Dataset/Image17_label.jpg', './Dataset/Image03_label.jpg', './Dataset/Image02_label.jpg', './Dataset/Image16_label.jpg', './Dataset/Image12_label.jpg', './Dataset/Image06_label.jpg', './Dataset/Image07_label.jpg', './Dataset/Image13_label.jpg', './Dataset/Image05_label.jpg', '.

### Data Augmentation
We are making use of the albumentations library to generate augmented images since originally our dataset contains only 20 input images and 20 labels.
##### Transformations used for augmenting image : 
                                        1. Rotation (probability : 50%)
                                        2. Horizontal flipping (probability : 50%)
                                        3. Vertical fliping (probability : 50%) 
                                        

In [None]:
transform = A.Compose([
    A.Rotate(limit=30, p=0.5),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
])

# directories to store generated images and labels
transformed_images_dir = "./Data/Images"
transformed_labels_dir = "./Data/Labels"

images_to_generate = 300
i = 1
while i <= images_to_generate:
    #randomly choose an image from available dataset to perform transformation
    number = random.randint(0, len(imagesList)-1)
    image = imagesList[number]
    label = labelsList[number]
    originalImage = cv2.imread(image)
    originalImage = cv2.cvtColor(originalImage, cv2.COLOR_BGR2RGB)
    originalLabel = cv2.imread(label)
    originalLabel = cv2.cvtColor(originalLabel, cv2.COLOR_BGR2RGB)

    transformed = transform( image=originalImage , mask=originalLabel)
    transformed_image = transformed['image']
    transformed_label = transformed['mask']

    transformed_image_path = os.path.join(transformed_images_dir, f'Image{i}_.JPG')
    transformed_label_path = os.path.join(transformed_labels_dir, f'Image{i}_label_.jpg')

    # save the augmented image and label to respective directories
    cv2.imwrite(transformed_image_path, transformed_image)
    cv2.imwrite(transformed_label_path, transformed_label)

    i=i+1

In [7]:
transformed_images_dir = "./Data/Images"
transformed_labels_dir = "./Data/Labels"

# list to store the augmented images and labels
images = []
labels = []

for file in os.listdir('./Data/Images'):
    #choose a image and find it's respective label
    img_path = './Data/Images/' + file
    l = file.split('_')[0] +'_label_.jpg'
    lbl_path = './Data/Labels/' + l

    # input image is RGB
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img , cv2.COLOR_BGR2RGB)
    img = cv2.resize(img , (256 , 256))
    images.append(img)

    # label is GRAYSCALE
    lbl = cv2.imread(lbl_path)
    lbl = cv2.cvtColor(lbl, cv2.COLOR_BGR2GRAY)
    lbl = cv2.resize(lbl , (256 , 256))
    labels.append(lbl)

# convert list to numpy array to be able to be used by the deep learning model
images = np.array(images)
labels = np.array(labels)




In [8]:
# verifying the shapes of the input and label arrays
print("---- Images Numpy Array ----")
print("Shape of array : ", images.shape)
print("Shape of every image : ", images[3].shape)
print("\n")

print("---- Labels Numpy Array ----")
print("Shape of array : ", labels.shape)
print("Shape of every label : ", labels[3].shape)

---- Images Numpy Array ----
Shape of array :  (300, 256, 256, 3)
Shape of every image :  (256, 256, 3)


---- Labels Numpy Array ----
Shape of array :  (300, 256, 256)
Shape of every label :  (256, 256)


In [9]:
# adding channel dimension to the grayscale labels so that these are useable in the model
labels = np.expand_dims( labels , axis=-1)
print(labels.shape)
print(labels[1].shape)

(300, 256, 256, 1)
(256, 256, 1)


### Model Prerequisites
    We will be using the resnet18 backbone for creating our model coupled with the Unet architecture.

In [17]:
BACKBONE = 'resnet18'

In [18]:
preprocess_input = get_preprocessing(BACKBONE)

x= images
y= labels

# converting the pixel value to be in the range of 0 to 1 and of float32 type
x = x.astype(np.float32)/255.0
y = y.astype(np.float32)/255.0

# splitting the dataset into training and testing part, getting a 80-20 split
x_train, x_val, y_train, y_val = train_test_split(x, y , test_size=0.2, random_state=42)
x_train = preprocess_input(x_train)
x_val = preprocess_input(x_val)


In [19]:
print("---- Checking Shapes ----")
print("x_train : ", x_train.shape)
print("x_val : ", x_val.shape)
print("y_train : ", y_train.shape)
print("y_val : ", y_val.shape)

---- Checking Shapes ----
x_train :  (240, 256, 256, 3)
x_val :  (60, 256, 256, 3)
y_train :  (240, 256, 256, 1)
y_val :  (60, 256, 256, 1)


### Model Creation
    We will use the Unet architecture provided by the semantic_models library.
    Since we only want to segment stones from the image, we will use classes = 1, and a sigmiod activation function.
    In additon to this , the encoder_weights are set to None therefore we will calulate weights for both the encoder and decoder part ourself.
    

In [23]:
model = sm.Unet(BACKBONE, classes = 1, activation='sigmoid' ,encoder_weights=None)
model.compile(
    'Adam',
    loss=sm.losses.binary_crossentropy,
    metrics=[sm.metrics.iou_score],
)

In [25]:
# early stopping help in stopping the training process before all the epochs have completed in case there is no signifiant improvement in loss
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    verbose=1,
    mode='min'
)

# calculating the weights and creating the model
model.fit(
    x = x_train,
    y = y_train,
    batch_size = 4,
    epochs = 50,
    validation_data = (x_val, y_val),
    callbacks = [early_stopping]
)


Epoch 1/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 887ms/step - iou_score: 0.2641 - loss: 0.0575 - val_iou_score: 0.1426 - val_loss: 0.0836
Epoch 2/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 906ms/step - iou_score: 0.4598 - loss: 0.0280 - val_iou_score: 0.3433 - val_loss: 0.0374
Epoch 3/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 912ms/step - iou_score: 0.5678 - loss: 0.0208 - val_iou_score: 0.5033 - val_loss: 0.0236
Epoch 4/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 915ms/step - iou_score: 0.6301 - loss: 0.0175 - val_iou_score: 0.5923 - val_loss: 0.0193
Epoch 5/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 910ms/step - iou_score: 0.6594 - loss: 0.0159 - val_iou_score: 0.6372 - val_loss: 0.0169
Epoch 6/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 913ms/step - iou_score: 0.6863 - loss: 0.0145 - val_iou_score: 0.6640 - val_loss: 0.0150
Epoch 7/50



In [26]:
# saving the model for future predictions
model.save('stone_seg_resnet18_encoderNone.keras')

### Prediction

In [None]:
model =keras.models.load_model('./stone_seg_resnet18_encoderNone.keras', compile=False)

test_img = cv2.imread('./Dataset/Image01.JPG')
test_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2RGB)
test_img = cv2.resize(test_img, (256 , 256))
test_img = test_img.astype(np.float32)/255.0
test_img = np.expand_dims(test_img, axis=0)

prediction = model.predict(test_img)
pred_img = np.squeeze(prediction , axis=0)
plt.imshow(pred_img , cmap='gray')