<a href="https://colab.research.google.com/github/Sharondafrin/FB-Likes-Prediction/blob/main/Table_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
#import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import os
import xml.etree.ElementTree as ET
from PIL import Image

#tensorflow libraries
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input, Concatenate, UpSampling2D

In [14]:
from google.colab import drive
drive.mount('/content/drive' , force_remount=True)

Mounted at /content/drive


In [15]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
#length of bmp files we have
len(glob.glob('/content/drive/MyDrive/Colab Notebooks/dataset/Marmot_data/*.bmp'))

494

In [17]:
#length of xml files we have
len(glob.glob('/content/drive/MyDrive/Colab Notebooks/dataset/Marmot_data/*.xml'))

494

In [18]:
#changing bmp image to jpg
IMAGE_PATH = '/content/drive/MyDrive/Colab Notebooks/dataset/Marmot_data'
for image in glob.glob('/content/drive/MyDrive/Colab Notebooks/dataset/Marmot_data/*.bmp'):
    im = Image.open(image)
    image_name = os.path.basename(image)
    save_image_path = os.path.join(IMAGE_PATH, image_name.replace('bmp', 'jpg'))
    im.save(save_image_path)

In [19]:
len(glob.glob('/content/drive/MyDrive/Colab Notebooks/dataset/Marmot_data/*.jpg'))

494

## Data Preperation

In [None]:
def sameTable(ymin_1, ymin_2, ymax_1, ymax_2):
    min_diff = abs(ymin_1 - ymin_2)
    max_diff = abs(ymax_1 - ymax_2)

    if min_diff <= 5 and max_diff <=5:
        return True
    elif min_diff <= 4 and max_diff <=7:
        return True
    elif min_diff <= 7 and max_diff <=4:
        return True
    return False

In [None]:
if __name__ == "__main__":
    directory = r'/content/drive/MyDrive/Colab Notebooks/dataset/Marmot_data'
    final_col_directory = r'/content/drive/MyDrive/Colab Notebooks/dataset/Columns Mask'
    final_table_directory = r'/content/drive/MyDrive/Colab Notebooks/dataset/Table Mask'

    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        # Find all the xml files
        if filename.endswith(".xml"):
            filename = filename[:-4]

            # Parse xml file
            tree = ET.parse(directory + '\\' + filename + '.xml')
            root = tree.getroot()
            size = root.find('size')
            
            # Parse width
            width = int(size.find('width').text)
            height = int(size.find('height').text)

            # Create grayscale image array
            col_mask = np.zeros((height, width), dtype=np.int32)
            table_mask = np.zeros((height, width), dtype = np.int32)

            got_first_column = False
            i=0
            table_xmin = 10000
            table_xmax = 0

            table_ymin = 10000
            table_ymax = 0

            for column in root.findall('object'):
                bndbox = column.find('bndbox')
                xmin = int(bndbox.find('xmin').text)
                ymin = int(bndbox.find('ymin').text)
                xmax = int(bndbox.find('xmax').text)
                ymax = int(bndbox.find('ymax').text)

                col_mask[ymin:ymax, xmin:xmax] = 255

                if got_first_column:
                    if sameTable(prev_ymin, ymin, prev_ymax, ymax) == False:
                        i+=1
                        got_first_column = False
                        table_mask[table_ymin:table_ymax, table_xmin:table_xmax] = 255

                        table_xmin = 10000
                        table_xmax = 0

                        table_ymin = 10000
                        table_ymax = 0

                if got_first_column == False:
                    got_first_column = True
                    first_xmin = xmin

                prev_ymin = ymin
                prev_ymax = ymax

                table_xmin = min(xmin, table_xmin)
                table_xmax = max(xmax, table_xmax)

                table_ymin = min(ymin, table_ymin)
                table_ymax = max(ymax, table_ymax)

            table_mask[table_ymin:table_ymax, table_xmin:table_xmax] = 255

            im = Image.fromarray(col_mask.astype(np.uint8),'L')
            im.save(final_col_directory + "\\" + filename + ".jpeg")

            im = Image.fromarray(table_mask.astype(np.uint8),'L')
            im.save(final_table_directory + "\\" + filename + ".jpeg")

FileNotFoundError: ignored

## Training the data

In [20]:
img_height, img_width = 256, 256
def normalize(input_image):

    input_image = tf.cast(input_image, tf.float32) / 255.0

    return input_image

def decode_img(img):

    img = tf.image.decode_jpeg(img)

    return tf.image.resize(img, [img_height, img_width])

def decode_mask_img(img):

    img = tf.image.decode_jpeg(img, channels=1)

    return tf.image.resize(img, [img_height, img_width])

def create_mask(pred_mask1, pred_mask2):

    pred_mask1 = tf.argmax(pred_mask1, axis=-1)
    pred_mask1 = pred_mask1[..., tf.newaxis]
    pred_mask2 = tf.argmax(pred_mask2, axis=-1)
    pred_mask2 = pred_mask2[..., tf.newaxis]

    return pred_mask1[0], pred_mask2[0]

def display(display_list):

    plt.figure(figsize=(15, 15))

    title = ['Input Image', 'Table Mask', 'Column Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.axis('off')
        plt.show()

    return None

def process_path(file_path):

    file_path = tf.strings.regex_replace(file_path, '.xml', '.jpg')
    #print(file_path)
    mask_file_path = tf.strings.regex_replace(file_path, '.jpg', '.jpeg')
    print(mask_file_path)
    table_mask_file_path = tf.strings.regex_replace(mask_file_path, 'Marmot_data', 'Table Mask')
    column_mask_file_path = tf.strings.regex_replace(mask_file_path, 'Marmot_data', 'Columns Mask')
    img = normalize(decode_img(tf.io.read_file(file_path)))
    table_mask = normalize(decode_mask_img(tf.io.read_file(table_mask_file_path)))
    column_mask = normalize(decode_mask_img(tf.io.read_file(column_mask_file_path)))

    return img, {"table_output" : table_mask, "column_output" : column_mask}

class TableNet:

    @staticmethod
    def build_table_decoder(inputs, pool3, pool4):
        x = Conv2D(512, (1, 1), activation = 'relu', name='conv7_table')(inputs)
        x = UpSampling2D(size=(2, 2))(x)

        concatenated = Concatenate()([x, pool4])

        x = UpSampling2D(size=(2,2))(concatenated)

        concatenated = Concatenate()([x, pool3])

        x = UpSampling2D(size=(2,2))(concatenated)
        x = UpSampling2D(size=(2,2))(x)

        last = tf.keras.layers.Conv2DTranspose(3, 3, strides=2, padding='same', name='table_output')

        x = last(x)

        return x

    @staticmethod
    def build_column_decoder(inputs, pool3, pool4):

        x = Conv2D(512, (1, 1), activation = 'relu', name='block7_conv1_column')(inputs)
        x = Dropout(0.8, name='block7_dropout_column')(x)

        x = Conv2D(512, (1, 1), activation = 'relu', name='block8_conv1_column')(x)
        x = UpSampling2D(size=(2, 2))(x)

        concatenated = Concatenate()([x, pool4])

        x = UpSampling2D(size=(2,2))(concatenated)

        concatenated = Concatenate()([x, pool3])

        x = UpSampling2D(size=(2,2))(concatenated)
        x = UpSampling2D(size=(2,2))(x)

        last = tf.keras.layers.Conv2DTranspose(3, 3, strides=2, padding='same', name='column_output')

        x = last(x)

        return x

    @staticmethod
    def vgg_base(inputs):
        base_model = tf.keras.applications.vgg19.VGG19(
            input_shape=[256, 256, 3],
            include_top=False, weights='imagenet')

        layer_names = ['block3_pool', 'block4_pool', 'block5_pool']
        layers = [base_model.get_layer(name).output for name in layer_names]

        pool_layers_model = Model(inputs=base_model.input, outputs=layers, name='VGG-19')
        pool_layers_model.trainable = False

        return pool_layers_model(inputs)

    @staticmethod
    def build():
        inputShape = (256, 256, 3)

        inputs = Input(shape=inputShape, name='input')

        pool_layers = TableNet.vgg_base(inputs)

        x = Conv2D(512, (1, 1), activation = 'relu', name='block6_conv1')(pool_layers[2])
        x = Dropout(0.8, name='block6_dropout1')(x)
        x = Conv2D(512, (1, 1), activation = 'relu', name='block6_conv2')(x)
        x = Dropout(0.8, name = 'block6_dropout2')(x)

        table_mask = TableNet.build_table_decoder(x, pool_layers[0], pool_layers[1])
        column_mask = TableNet.build_column_decoder(x, pool_layers[0], pool_layers[1])

        model = Model(
                        inputs=inputs,
                        outputs=[table_mask, column_mask],
                        name="tablenet")

        return model

def main():

    list_ds = tf.data.Dataset.list_files(os.path.join(train_data,'*xml'), shuffle=False)

    DATASET_SIZE = len(list(list_ds))
    train_size = int(0.9 * DATASET_SIZE)
    test_size = int(0.1 * DATASET_SIZE)

    train = list_ds.take(train_size)
    test = list_ds.skip(train_size)

    TRAIN_LENGTH = len(list(train))
    BATCH_SIZE = 1
    BUFFER_SIZE = 10
    STEPS_PER_EPOCH = TRAIN_LENGTH // BATCH_SIZE

    train = train.shuffle(BUFFER_SIZE)

    train = train.map(process_path, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    test = test.map(process_path)

    train_dataset = train.batch(BATCH_SIZE).repeat().prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    test_dataset = test.batch(BATCH_SIZE)

    losses = {
        "table_output": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        "column_output": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    }

    lossWeights = {"table_output": 1.0, "column_output": 1.0}

    model = TableNet.build()

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, epsilon=1e-08),
                  loss=losses,
                  metrics=['accuracy'],
                  loss_weights=lossWeights)

    model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
            filepath="/content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_{epoch}",
            save_best_only=True,
            monitor="val_loss",
            verbose=1,
        )

    early_stoping = tf.keras.callbacks.EarlyStopping(
            monitor="val_loss",
            patience=5,
            verbose=1,
        )

    EPOCHS = 400
    VAL_SUBSPLITS = 5
    VALIDATION_STEPS = test_size//BATCH_SIZE//VAL_SUBSPLITS


    model_history = model.fit(train_dataset, epochs=EPOCHS,
                              steps_per_epoch=STEPS_PER_EPOCH,
                              validation_steps=VALIDATION_STEPS,
                              validation_data=test_dataset,
                              callbacks=[model_checkpoint])

    model.save('/content/drive/MyDrive/Colab Notebooks/Model_output/model_tes')

if __name__ == "__main__":

    train_data = '/content/drive/MyDrive/Colab Notebooks/dataset/Marmot_data'

    main()


Tensor("StaticRegexReplace_1:0", shape=(), dtype=string)
Tensor("StaticRegexReplace_1:0", shape=(), dtype=string)
Epoch 1/400
Epoch 1: val_loss improved from inf to 0.69599, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_1




Epoch 2/400
Epoch 2: val_loss improved from 0.69599 to 0.49438, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_2




Epoch 3/400
Epoch 3: val_loss did not improve from 0.49438
Epoch 4/400
Epoch 4: val_loss did not improve from 0.49438
Epoch 5/400
Epoch 5: val_loss improved from 0.49438 to 0.39195, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_5




Epoch 6/400
Epoch 6: val_loss improved from 0.39195 to 0.38955, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_6




Epoch 7/400
Epoch 7: val_loss did not improve from 0.38955
Epoch 8/400
Epoch 8: val_loss did not improve from 0.38955
Epoch 9/400
Epoch 9: val_loss did not improve from 0.38955
Epoch 10/400
Epoch 10: val_loss did not improve from 0.38955
Epoch 11/400
Epoch 11: val_loss improved from 0.38955 to 0.35774, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_11




Epoch 12/400
Epoch 12: val_loss did not improve from 0.35774
Epoch 13/400
Epoch 13: val_loss improved from 0.35774 to 0.32597, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_13




Epoch 14/400
Epoch 14: val_loss did not improve from 0.32597
Epoch 15/400
Epoch 15: val_loss did not improve from 0.32597
Epoch 16/400
Epoch 16: val_loss did not improve from 0.32597
Epoch 17/400
Epoch 17: val_loss did not improve from 0.32597
Epoch 18/400
Epoch 18: val_loss did not improve from 0.32597
Epoch 19/400
Epoch 19: val_loss improved from 0.32597 to 0.31105, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_19




Epoch 20/400
Epoch 20: val_loss did not improve from 0.31105
Epoch 21/400
Epoch 21: val_loss did not improve from 0.31105
Epoch 22/400
Epoch 22: val_loss did not improve from 0.31105
Epoch 23/400
Epoch 23: val_loss did not improve from 0.31105
Epoch 24/400
Epoch 24: val_loss did not improve from 0.31105
Epoch 25/400
Epoch 25: val_loss did not improve from 0.31105
Epoch 26/400
Epoch 26: val_loss did not improve from 0.31105
Epoch 27/400
Epoch 27: val_loss did not improve from 0.31105
Epoch 28/400
Epoch 28: val_loss did not improve from 0.31105
Epoch 29/400
Epoch 29: val_loss did not improve from 0.31105
Epoch 30/400
Epoch 30: val_loss did not improve from 0.31105
Epoch 31/400
Epoch 31: val_loss did not improve from 0.31105
Epoch 32/400
Epoch 32: val_loss did not improve from 0.31105
Epoch 33/400
Epoch 33: val_loss did not improve from 0.31105
Epoch 34/400
Epoch 34: val_loss did not improve from 0.31105
Epoch 35/400
Epoch 35: val_loss did not improve from 0.31105
Epoch 36/400
Epoch 36: v



Epoch 114/400
Epoch 114: val_loss did not improve from 0.30841
Epoch 115/400
Epoch 115: val_loss did not improve from 0.30841
Epoch 116/400
Epoch 116: val_loss did not improve from 0.30841
Epoch 117/400
Epoch 117: val_loss did not improve from 0.30841
Epoch 118/400
Epoch 118: val_loss did not improve from 0.30841
Epoch 119/400
Epoch 119: val_loss did not improve from 0.30841
Epoch 120/400
Epoch 120: val_loss did not improve from 0.30841
Epoch 121/400
Epoch 121: val_loss improved from 0.30841 to 0.30288, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_121




Epoch 122/400
Epoch 122: val_loss did not improve from 0.30288
Epoch 123/400
Epoch 123: val_loss improved from 0.30288 to 0.28473, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_123




Epoch 124/400
Epoch 124: val_loss did not improve from 0.28473
Epoch 125/400
Epoch 125: val_loss did not improve from 0.28473
Epoch 126/400
Epoch 126: val_loss did not improve from 0.28473
Epoch 127/400
Epoch 127: val_loss did not improve from 0.28473
Epoch 128/400
Epoch 128: val_loss did not improve from 0.28473
Epoch 129/400
Epoch 129: val_loss did not improve from 0.28473
Epoch 130/400
Epoch 130: val_loss did not improve from 0.28473
Epoch 131/400
Epoch 131: val_loss did not improve from 0.28473
Epoch 132/400
Epoch 132: val_loss did not improve from 0.28473
Epoch 133/400
Epoch 133: val_loss did not improve from 0.28473
Epoch 134/400
Epoch 134: val_loss did not improve from 0.28473
Epoch 135/400
Epoch 135: val_loss did not improve from 0.28473
Epoch 136/400
Epoch 136: val_loss did not improve from 0.28473
Epoch 137/400
Epoch 137: val_loss improved from 0.28473 to 0.28468, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_137




Epoch 138/400
Epoch 138: val_loss did not improve from 0.28468
Epoch 139/400
Epoch 139: val_loss improved from 0.28468 to 0.28183, saving model to /content/drive/MyDrive/Colab Notebooks/Model_output/mymodel_139




Epoch 140/400
Epoch 140: val_loss did not improve from 0.28183
Epoch 141/400
Epoch 141: val_loss did not improve from 0.28183
Epoch 142/400
Epoch 142: val_loss did not improve from 0.28183
Epoch 143/400
Epoch 143: val_loss did not improve from 0.28183
Epoch 144/400
Epoch 144: val_loss did not improve from 0.28183
Epoch 145/400
Epoch 145: val_loss did not improve from 0.28183
Epoch 146/400
Epoch 146: val_loss did not improve from 0.28183
Epoch 147/400
Epoch 147: val_loss did not improve from 0.28183
Epoch 148/400
Epoch 148: val_loss did not improve from 0.28183
Epoch 149/400
Epoch 149: val_loss did not improve from 0.28183
Epoch 150/400
Epoch 150: val_loss did not improve from 0.28183
Epoch 151/400
Epoch 151: val_loss did not improve from 0.28183
Epoch 152/400
Epoch 152: val_loss did not improve from 0.28183
Epoch 153/400
Epoch 153: val_loss did not improve from 0.28183
Epoch 154/400
Epoch 154: val_loss did not improve from 0.28183
Epoch 155/400
Epoch 155: val_loss did not improve from 



Epoch 186/400
Epoch 186: val_loss did not improve from 0.26125
Epoch 187/400
Epoch 187: val_loss did not improve from 0.26125
Epoch 188/400
Epoch 188: val_loss did not improve from 0.26125
Epoch 189/400
Epoch 189: val_loss did not improve from 0.26125
Epoch 190/400
Epoch 190: val_loss did not improve from 0.26125
Epoch 191/400
Epoch 191: val_loss did not improve from 0.26125
Epoch 192/400
Epoch 192: val_loss did not improve from 0.26125
Epoch 193/400
Epoch 193: val_loss did not improve from 0.26125
Epoch 194/400
Epoch 194: val_loss did not improve from 0.26125
Epoch 195/400
Epoch 195: val_loss did not improve from 0.26125
Epoch 196/400
Epoch 196: val_loss did not improve from 0.26125
Epoch 197/400
Epoch 197: val_loss did not improve from 0.26125
Epoch 198/400
Epoch 198: val_loss did not improve from 0.26125
Epoch 199/400
Epoch 199: val_loss did not improve from 0.26125
Epoch 200/400
Epoch 200: val_loss did not improve from 0.26125
Epoch 201/400
Epoch 201: val_loss did not improve from 



Epoch 222/400
Epoch 222: val_loss did not improve from 0.25264
Epoch 223/400
Epoch 223: val_loss did not improve from 0.25264
Epoch 224/400
Epoch 224: val_loss did not improve from 0.25264
Epoch 225/400
Epoch 225: val_loss did not improve from 0.25264
Epoch 226/400
Epoch 226: val_loss did not improve from 0.25264
Epoch 227/400
Epoch 227: val_loss did not improve from 0.25264
Epoch 228/400
Epoch 228: val_loss did not improve from 0.25264
Epoch 229/400
Epoch 229: val_loss did not improve from 0.25264
Epoch 230/400
Epoch 230: val_loss did not improve from 0.25264
Epoch 231/400
Epoch 231: val_loss did not improve from 0.25264
Epoch 232/400
Epoch 232: val_loss did not improve from 0.25264
Epoch 233/400
Epoch 233: val_loss did not improve from 0.25264
Epoch 234/400
Epoch 234: val_loss did not improve from 0.25264
Epoch 235/400
Epoch 235: val_loss did not improve from 0.25264
Epoch 236/400
Epoch 236: val_loss did not improve from 0.25264
Epoch 237/400
Epoch 237: val_loss did not improve from 



Epoch 247/400
Epoch 247: val_loss did not improve from 0.25006
Epoch 248/400
Epoch 248: val_loss did not improve from 0.25006
Epoch 249/400
Epoch 249: val_loss did not improve from 0.25006
Epoch 250/400
Epoch 250: val_loss did not improve from 0.25006
Epoch 251/400
Epoch 251: val_loss did not improve from 0.25006
Epoch 252/400
Epoch 252: val_loss did not improve from 0.25006
Epoch 253/400
Epoch 253: val_loss did not improve from 0.25006
Epoch 254/400
Epoch 254: val_loss did not improve from 0.25006
Epoch 255/400
Epoch 255: val_loss did not improve from 0.25006
Epoch 256/400
Epoch 256: val_loss did not improve from 0.25006
Epoch 257/400
Epoch 257: val_loss did not improve from 0.25006
Epoch 258/400
Epoch 258: val_loss did not improve from 0.25006
Epoch 259/400
Epoch 259: val_loss did not improve from 0.25006
Epoch 260/400
Epoch 260: val_loss did not improve from 0.25006
Epoch 261/400
Epoch 261: val_loss did not improve from 0.25006
Epoch 262/400
Epoch 262: val_loss did not improve from 

