## **THEORY**

**The architecture used**

<img src="u-net-architecture.png"/>

## **CODE**

**Importing all the libraries**

Version of each libraries <br>
Name----------------------->version <br>
Python---------------------->3.11.3 <br>
Tensorflow------------------>2.12.0 <br>
cv2------------------------->4.7.0 <br>
Numpy----------------------->1.23.5 <br>
Keras----------------------->2.12.0 <br>
Matplotlib------------------>3.7.1 <br>
sklearn--------------------->1.2.2 <br>


In [None]:
import tensorflow as tf
import glob
import cv2
import os
import numpy as np
from matplotlib import pyplot as plt
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, concatenate, Conv2DTranspose, BatchNormalization, Dropout, Lambda
from keras import backend as K
from tensorflow import keras
import sklearn
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model

In [None]:
patch_size = 720 #every picture is 720x720

## **Taking the images from the files and appending them into arrays**

In [None]:
train_images = []
jade = []
counter1 = 0 
for directory_path in glob.glob("input_2018"):
    for img_path in glob.glob(os.path.join(directory_path, "*.png")):
        jade.append(img_path)
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)      
        img = (img.astype('float32')) / 255. 
        train_images.append(img)      #takes all the images and appends their values in a list
        counter1 += 1
#Convert list to array for machine learning processing        
train_images = np.array(train_images)


In [None]:
os.chdir(r'C:\Users\user\Desktop\only_building_attempt_2\fefe')

In [None]:
print(jade)

In [None]:
print(len(jade))

In [None]:
modified_list = [s[-16:-4] for s in jade]

In [None]:
print(modified_list[11])

In [None]:
train_masks = []
counter2 = 0
for directory_path in glob.glob("mask"):
    for mask_path in glob.glob(os.path.join(directory_path, "*.png")):
        mask = cv2.imread(mask_path, cv2.IMREAD_COLOR) 
        train_masks.append(mask) 
        counter2 += 1        
#Convert list to array for machine learning processing          
train_masks = np.array(train_masks)

In [None]:
print(counter1,counter2) #sanity check if the images and mask appended are equal

In [None]:
ar1 = np.unique(train_masks)  

In [None]:
print(ar1)
print("""The following pixel values represents the following class
85: buildings
170: background
255: Null""")

**Sanity check for seeing if the images correspond to the correct masks**

In [None]:
import random
import numpy as np
image_number = random.randint(0, len(train_images))
print(image_number)
plt.figure(figsize=(12, 6))
plt.subplot(121)
plt.imshow(np.reshape(train_images[image_number], (patch_size, patch_size, 3)))
plt.subplot(122)
plt.imshow(np.reshape(train_masks[image_number], (patch_size, patch_size,3)))
plt.show()

## **Converting the images into integer coded for the machine learning algortihm**

In [None]:
building = np.array([85,85,85])
unlabeled = np.array([170,170,170])
null= np.array([255,255,255])

In [None]:
def rgb_to_2D_label(label):
    """
    Converts the RGB pixel into interger coded  
    """
    label_seg = np.zeros(label.shape,dtype=np.uint8)
    label_seg [np.all(label == null,axis=-1)] = 2
    label_seg [np.all(label==unlabeled,axis=-1)] = 0
    label_seg [np.all(label==building,axis=-1)] = 1
    
    label_seg = label_seg[:,:,0]  #Just take the first channel, no need for all 3 channels
    
    return label_seg


In [None]:
labels = []
for i in range(train_masks.shape[0]):
    label = rgb_to_2D_label(train_masks[i])
    labels.append(label)    

labels = np.array(labels)   
labels = np.expand_dims(labels, axis=3) #this will convert the rgb values into integer coded

In [None]:
ar2 = np.unique(labels)

In [None]:
print(ar2)

## **Removing the images which we don't need**

**Removing the images with Null values**

In [None]:
test = []
bin = []
counter3 = 0
for i in range(len(labels)):
    x = [2] in labels[i] #this will filter the images with 0 in them and remove them
    if x is False:
        test.append(labels[i].astype('float32'))    
    else:
        bin.append(i) #making a list of all the images with null values
        counter3 += 1
labels1 = np.array(test)

In [None]:
test2 = []
counter5 = 0
for i in range(len(train_images)):
#using the list above made to remove the images with null values
    if i in bin:
        counter5 += 1
    else:
        test2.append(train_images[i])
train_images1 = np.array(test2)

In [None]:
print(len(labels1))
print(len(bin))
print(len)

In [None]:
import random
import numpy as np
image_number = random.randint(0, len(train_images1-1))
print(image_number)
plt.figure(figsize=(12, 6))
plt.subplot(121)
plt.imshow(np.reshape(train_images1[image_number], (patch_size, patch_size, 3)))
plt.subplot(122)
plt.imshow(np.reshape(labels1[image_number], (patch_size, patch_size,1)))
plt.show()

In [None]:
print(np.unique(labels1))

**Removing the images with only background**

In [None]:
#Only use this if you want pictures with both buildings and background
bin = []
test = []
for i in range(len(labels1)):
    x = [1] in labels1[i]
    if x is True:
        test.append(labels1[i].astype('float32'))    
    else:
        bin.append(i)
        counter3 += 1
labels2 = np.array(test)

In [None]:
print(len(test))
print(len(bin))
ar2 = np.unique(labels2)
print(ar2)#doing some sanity checks to make sure all the data is getting used

In [None]:
test2 = []
counter5 = 0
for i in range(len(train_images1)):
#using the list above made to remove the images with null values
    if i in bin:
        counter5 += 1
    else:
        test2.append(train_images1[i])
train_images2 = np.array(test2)


In [None]:
print(len(train_images2))
print(counter5)
train_images2.dtype

In [None]:
print(train_images2.shape,labels2.shape)

In [None]:
print(np.unique(labels2))

**Another sanity to make sure that all the files have been appended correctly after removal of images with null values in them**

In [None]:
import random
import numpy as np
image_number = random.randint(0, len(train_images2-1))
print(image_number)
plt.figure(figsize=(12, 6))
plt.subplot(121)
plt.imshow(np.reshape(train_images2[image_number], (patch_size, patch_size, 3)))
plt.subplot(122)
plt.imshow(np.reshape(labels2[image_number], (patch_size, patch_size,1)))
plt.show()

## **Spliting the data**

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_images2, labels2, test_size = 0.20, random_state = 42)

In [None]:
len(X_train)

**Creating digital negatives**

In [None]:
#we will create digital negative of every train image so model does not train only on color and learns to find more parameters
digital_negative = []
for i in range(len(X_train)):
    dig_neg = 1 - X_train[i]
    digital_negative.append(dig_neg)
digital_negative = np.array(digital_negative)
X_train = np.concatenate((X_train,digital_negative))
print(len(X_train))

In [None]:
#As we have made digital negative of every image we will have to append the same labels because the images class are same
y_train = np.concatenate((y_train,y_train))
print(len(y_train))

## **Assign sample weights because of Imbalanced classes as the background is in the majority**

In [None]:
def add_sample_weights(i,label):

  one_count = np.count_nonzero(label)
  zero_count = 518400 - one_count # every image has total 518400 values because it is an 720x720 image
  c= 1000000 #using this constant because every image are 6 numbers
  x = c/(2*zero_count)
  y = c/(2*one_count)#this will find the correct sample weights such that even if the background is in the majority then also both will have equal importance
  # The weights for each class, with the constraint that:
  #     sum(class_weights) == 1.0
  class_weights = tf.constant([x,y])
  class_weights = class_weights/tf.reduce_sum(class_weights)

  # Create an image of `sample_weights` by using the label at each pixel as an 
  # index into the `class weights` .
  sample_weights = tf.gather(class_weights, indices=tf.cast(label, tf.int32))

  return sample_weights

In [None]:
sample_weights = []#creating the sample weights for every image we have 
for i in range(X_train.shape[0]):
    sample_weights1 = add_sample_weights(i,y_train[i])
    sample_weights.append(sample_weights1)
sample_weights = np.array(sample_weights)   

In [None]:
print(sample_weights.shape[0])#sanity check

In [None]:
sample_weights[1]

## **Making the Deep learning Model**

**Defining the Convolutional Blocks**

In [None]:
def down_block(x, filters, kernel_size=(3, 3), padding="same", strides=1):
    c = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(x)
    c = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(c)
    p = keras.layers.MaxPool2D((2, 2), (2, 2))(c)
    return c, p

def up_block(x, skip, filters, kernel_size=(3, 3), padding="same", strides=1):
    us = keras.layers.UpSampling2D((2, 2))(x)
    concat = keras.layers.Concatenate()([us, skip])
    c = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(concat)
    c = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(c)
    return c

def bottleneck(x, filters, kernel_size=(3, 3), padding="same", strides=1):
    c = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(x)
    c = keras.layers.Conv2D(filters, kernel_size, padding=padding, strides=strides, activation="relu")(c)
    return c

**Defining the architecture**

In [None]:
def UNet():
    f = [16, 32, 64, 128, 256]
    inputs = keras.layers.Input((patch_size, patch_size, 3))
    
    p0 = inputs
    c1, p1 = down_block(p0, f[0]) #128 -> 64
    c2, p2 = down_block(p1, f[1]) #64 -> 32
    c3, p3 = down_block(p2, f[2]) #32 -> 16
    c4, p4 = down_block(p3, f[3]) #16->8
    
    bn = bottleneck(p4, f[4])
    
    u1 = up_block(bn, c4, f[3]) #8 -> 16
    u2 = up_block(u1, c3, f[2]) #16 -> 32
    u3 = up_block(u2, c2, f[1]) #32 -> 64
    u4 = up_block(u3, c1, f[0]) #64 -> 128
    
    outputs = keras.layers.Conv2D(1, (1, 1), padding="same", activation="sigmoid")(u4)
    model = keras.models.Model(inputs, outputs)
    return model

**Compiling the model**

In [None]:
model = UNet()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), loss= "binary_focal_crossentropy", metrics=["acc"])
model.summary()

## **Training the model**

In [None]:
history1 = model.fit(X_train, y_train, 
                    batch_size = 16, 
                    verbose=1, 
                    epochs= 30,
                    validation_data=(X_test, y_test), 
                    shuffle=False,
                    sample_weight=sample_weights)

**Plotting the model performance**

In [None]:
history = history1
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'y', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
history = history1
acc = history.history['acc']
val_acc = history.history['val_acc']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, acc, 'y', label='Training Accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation Accuracy')
plt.title('Training and validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

**Saving and loading the model**

In [None]:
#model.save(os.path.join('models','inverse_model_unet.h5'))

In [None]:
model = load_model(os.path.join('models','inverse_model_unet.h5'))

**Making predictions using the model**

In [None]:
result = model.predict(train_images)

In [None]:
result.shape

In [None]:
directory = r'C:\Users\user\Desktop\only_building_attempt_2\output_2018'
os.chdir(directory)

In [None]:
my_dpi = 144
for i in range(result.shape[0]):
    gun = (result[i]*255).astype(int)
    fig = plt.figure(figsize=(936/my_dpi, 936/my_dpi), dpi=my_dpi)
    fig = plt.imshow(gun)
    plt.axis('off')
    a = modified_list[i]
    plt.savefig(f'{a}.png', bbox_inches='tight', pad_inches = 0)

**Testing the model**

In [None]:
import random
test_img_number = random.randint(0, len(X_test)-1)

test_img = X_test[test_img_number]
ground_truth=y_test[test_img_number]
predicted_img = result[test_img_number]
kernel_size = (8,8)  # Adjust the kernel size as desired
kernel = np.ones(kernel_size, dtype=np.uint8)

# Apply erosion to the image
eroded_image = cv2.erode(result[test_img_number], kernel, iterations=1)
sobel_x = cv2.Sobel(eroded_image*255, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(eroded_image*255, cv2.CV_64F, 0, 1, ksize=3)

# Calculate the magnitude and convert to uint8
gradient_magnitude = cv2.magnitude(sobel_x, sobel_y)
gradient_magnitude = cv2.convertScaleAbs(gradient_magnitude)

# Apply a threshold to obtain the edges
threshold_value = 10  # Adjust the threshold value as desired
edges = cv2.threshold(gradient_magnitude, threshold_value, 255, cv2.THRESH_BINARY)[1]

eroded_image1 = cv2.erode(y_test[test_img_number], kernel, iterations=1)
sobel_x = cv2.Sobel(result[test_img_number]*255, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(result[test_img_number]*255, cv2.CV_64F, 0, 1, ksize=3)

# Calculate the magnitude and convert to uint8
gradient_magnitude = cv2.magnitude(sobel_x, sobel_y)
gradient_magnitude = cv2.convertScaleAbs(gradient_magnitude)

# Apply a threshold to obtain the edges
threshold_value = 29 # Adjust the threshold value as desired
edges2 = cv2.threshold(gradient_magnitude, threshold_value, 255, cv2.THRESH_BINARY)[1]


print(test_img_number)
plt.figure(figsize=(12, 8)) 
plt.subplot(231)
plt.title('Testing Image')
plt.imshow(test_img)
plt.subplot(232)
plt.title('Testing Label')
plt.imshow(ground_truth)
plt.subplot(234)
plt.title('Prediction on test image')
plt.imshow(predicted_img)
plt.subplot(235)
plt.title('Prediction after morphological functions')
plt.imshow(eroded_image)
plt.subplot(236)
plt.title('Edge in the buildings')
plt.imshow(edges)
plt.subplot(233)
plt.title('Edge in the building from mask')
plt.imshow(edges2)

plt.show()