In [3]:
from huggingface_hub import hf_hub_download
from PIL import Image
import os
from transformers import DetrImageProcessor, TableTransformerForObjectDetection
import torch
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from skimage.measure import label, regionprops
import numpy as np
from skimage.transform import resize
import warnings

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def count_files_in_folder(folder_path):
    if not os.path.exists(folder_path):
        raise FileNotFoundError("The folder does not exist.")

    file_count = 0

    for _, _, files in os.walk(folder_path):
        file_count += len(files)

    return file_count

In [5]:
folder_path ="png_ano_test"
num_files = count_files_in_folder(folder_path)
print("Number of files in the folder:", num_files)

Number of files in the folder: 580


In [6]:
folder_path ="test_img"
num_files = count_files_in_folder(folder_path)
print("Number of files in the folder:", num_files)

Number of files in the folder: 580


Since there is a imbalance in the number of masked files and actual images . We have to introduce blank png files in the png_ano_test folder accurately named.

In [7]:

def find_additional_files_ignore_extension(folder1_path, folder2_path):
    if not os.path.exists(folder1_path) or not os.path.exists(folder2_path):
        raise FileNotFoundError("One or both folders do not exist.")

    folder1_files = set([os.path.splitext(file)[0] for file in os.listdir(folder1_path)])
    folder2_files = set([os.path.splitext(file)[0] for file in os.listdir(folder2_path)])

    additional_files_in_folder1 = folder1_files - folder2_files

    return additional_files_in_folder1

In [8]:
folder1_path = "test_img"
folder2_path = "png_ano_test"

additional_files = find_additional_files_ignore_extension(folder1_path, folder2_path)
print("Additional files in folder 1 but not in folder 2 (ignoring extensions):")

print(len(additional_files))
for file in additional_files:
    print(file)

Additional files in folder 1 but not in folder 2 (ignoring extensions):
0


In [9]:
def create_blank_png_files(folder_path, filenames):
    if not os.path.exists(folder_path):
        raise FileNotFoundError("The folder does not exist.")

    for filename in filenames:
        file_path = os.path.join(folder_path, filename + ".png")
        blank_image = Image.new("RGB", (512, 512), color=(0, 0, 0))  # Create a 1x1 white image that is appropiately scaled
        blank_image.save(file_path)

In [10]:
if additional_files:
    create_blank_png_files(folder2_path, additional_files)
    print(f"Blank PNG files created in {folder2_path} for missing files.")
else:
    print("No missing files found in the second folder.")

No missing files found in the second folder.


## Training Dataset

In [11]:
dataset_path = 'train_img'  # Path to the dataset folder containing images
masked_dataset_path = 'png_ano_train'  # Path to the already masked dataset folder


In [12]:
# Load the pretrained model and feature extractor
model = TableTransformerForObjectDetection.from_pretrained("microsoft/table-transformer-detection")
feature_extractor = DetrImageProcessor()

In [13]:
# Set threshold for detection
detection_threshold = 0.7

### IoU implementation

In [14]:
def IoUScore(predicted_array, ground_truth_array):
    intersection = np.logical_and(predicted_array, ground_truth_array)
    union = np.logical_or(predicted_array,ground_truth_array)

    intersection_area = np.sum(intersection)
    union_area = np.sum(union)

    if union_area==0:
        return 0
    else :
        return intersection_area/union_area

In [69]:
# Define evaluation metrics
y_true = []  # Ground truth labels
y_pred = []  # Predicted labels
counter = 0
for file_name in os.listdir(dataset_path):
    
    # Load the dataset image (JPG)
    image_path = os.path.join(dataset_path, file_name)
    image = Image.open(image_path).convert("RGB")
    
    # Load the corresponding masked image (PNG)
    masked_image_path = os.path.join(masked_dataset_path, file_name[:-4] + ".png")  # Assuming the filenames are consistent
    masked_image = Image.open(masked_image_path)
    
    # Preprocess the dataset image
    width, height = image.size
    encoding = feature_extractor(image, return_tensors="pt")
    
    # Perform table detection
    with torch.no_grad():
        outputs = model(**encoding)
    
    # Post-process the detection results
    width, height = image.size
    results = feature_extractor.post_process_object_detection(outputs, threshold=detection_threshold, target_sizes=[(height, width)])[0]
    
    # Convert the results to binary masks
    pred_mask = np.zeros((height, width), dtype=np.uint8)
    for box in results['boxes']:
        xmin, ymin, xmax, ymax = box.tolist()
        xmin, xmax = int(xmin), int(xmax)
        ymin, ymax = int(ymin), int(ymax)
        pred_mask[ymin:ymax, xmin:xmax] = 1
    
    
    # Convert the masked image to binary mask
    true_mask = np.array(masked_image)
    true_mask = true_mask*255
    temp_data = Image.fromarray(true_mask).convert("L")
    true_mask = np.array(temp_data)
    true_mask = np.where(true_mask == 0, 1, 0)
    # Compare the binary masks

    y_true.extend(true_mask.flatten())
    y_pred.extend(pred_mask.flatten())
    
    if counter%100 == 0:
        print(counter," images done....") 
    
    counter += 1

0  images done....
100  images done....
200  images done....
300  images done....
400  images done....
500  images done....
600  images done....
700  images done....
800  images done....
900  images done....
1000  images done....
1100  images done....
1200  images done....
1300  images done....
1400  images done....
1500  images done....


In [70]:
len(y_true), len(y_pred)

(393740288, 393740288)

In [73]:
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)
iouscore = IoUScore(y_true,y_pred)


In [74]:
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")
print(f"IoU Score: {iouscore}")


Precision: 0.9604721881196078
Recall: 0.6736312502443701
F1 Score: 0.791876530846445
Accuracy: 0.9304003886947937
IoU Score: 0.655459935234315


In [15]:
all_values = np.empty(0)
keys = []
values = []
ioudict = {}

In [21]:
counter = 0
destination_path = "train_directory"
for file_name in os.listdir(dataset_path):
    
    y_true = []
    y_test = []
    # Load the dataset image (JPG)
    image_path = os.path.join(dataset_path, file_name)
    image = Image.open(image_path).convert("RGB")
    
    # Load the corresponding masked image (PNG)
    masked_image_path = os.path.join(masked_dataset_path, file_name[:-4] + ".png")  # Assuming the filenames are consistent
    masked_image = Image.open(masked_image_path)
    
    # Preprocess the dataset image
    width, height = image.size
    encoding = feature_extractor(image, return_tensors="pt")
    
    # Perform table detection
    with torch.no_grad():
        outputs = model(**encoding)
    
    # Post-process the detection results
    width, height = image.size
    results = feature_extractor.post_process_object_detection(outputs, threshold=detection_threshold, target_sizes=[(height, width)])[0]
    
    # Convert the results to binary masks
    pred_mask = np.zeros((height, width), dtype=np.uint8)
    for box in results['boxes']:
        xmin, ymin, xmax, ymax = box.tolist()
        xmin, xmax = int(xmin), int(xmax)
        ymin, ymax = int(ymin), int(ymax)
        pred_mask[ymin:ymax, xmin:xmax] = 1
    
    
    # Convert the masked image to binary mask
    true_mask = np.array(masked_image)
    true_mask = true_mask*255
    temp_data = Image.fromarray(true_mask).convert("L")
    true_mask = np.array(temp_data)
    true_mask = np.where(true_mask == 0, 1, 0)

    np.append(all_values,IoUScore(pred_mask,true_mask))
    if IoUScore(pred_mask,true_mask)<0.8:
        filename = os.path.basename(image_path)
        destination_file_path = os.path.join(destination_path, filename)
        with open(source_path, 'rb') as source_file, open(destination_file_path, 'wb') as dest_file:
            dest_file.write(source_file.read())
        keys.append(image_path)
        values.append(IoUScore(pred_mask,true_mask))
    
    if counter%100 == 0:
        print(counter," images done....") 
    
    counter += 1
    

0  images done....
100  images done....
200  images done....
300  images done....
400  images done....
500  images done....
600  images done....
700  images done....
800  images done....
900  images done....
1000  images done....
1100  images done....
1200  images done....
1300  images done....
1400  images done....
1500  images done....


In [None]:
for i in range(len(keys)):
    ioudict[keys[i]] = values[i]

len(ioudict),np.mean(all_values)

667 nan


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


## Test Dataset

In [22]:
dataset_path = 'test_img'  # Path to the dataset folder containing images
masked_dataset_path = 'png_ano_test'  # Path to the already masked dataset folder

In [23]:
# Define evaluation metrics

y_true_array = []  # ground truth value
y_pred_array = []  # predicted value

y_true = np.array(y_true_array)
y_pred = np.array(y_pred_array)

In [90]:
counter = 0
for file_name in os.listdir(dataset_path):
    
    # Load the dataset image (JPG)
    image_path = os.path.join(dataset_path, file_name)
    image = Image.open(image_path).convert("RGB")
    
    # Load the corresponding masked image (PNG)
    masked_image_path = os.path.join(masked_dataset_path, file_name[:-4] + ".png")  # Assuming the filenames are consistent
    masked_image = Image.open(masked_image_path)
    
    # Preprocess the dataset image
    width, height = image.size
    encoding = feature_extractor(image, return_tensors="pt")
    
    # Perform table detection
    with torch.no_grad():
        outputs = model(**encoding)
    
    # Post-process the detection results
    width, height = image.size
    results = feature_extractor.post_process_object_detection(outputs, threshold=detection_threshold, target_sizes=[(height, width)])[0]
    
    # Convert the results to binary masks
    pred_mask = np.zeros((height, width), dtype=np.uint8)
    for box in results['boxes']:
        xmin, ymin, xmax, ymax = box.tolist()
        xmin, xmax = int(xmin), int(xmax)
        ymin, ymax = int(ymin), int(ymax)
        pred_mask[ymin:ymax, xmin:xmax] = 1
    
    
    # Convert the masked image to binary mask
    true_mask = np.array(masked_image)
    true_mask = true_mask*255
    temp_data = Image.fromarray(true_mask).convert("L")
    true_mask = np.array(temp_data)
    true_mask = np.where(true_mask == 0, 1, 0)
    # Compare the binary masks

    y_true = np.append(y_true, true_mask.flatten())
    y_pred = np.append(y_pred, pred_mask.flatten())
    
    if counter%100 == 0:
        print(counter," images done....") 
    
    counter += 1

0  images done....
100  images done....
200  images done....
300  images done....
400  images done....
500  images done....


In [91]:
precision = precision_score(y_true, y_pred)
recall = recall_score(y_true, y_pred)
f1 = f1_score(y_true, y_pred)
accuracy = accuracy_score(y_true, y_pred)
iouscore = IoUScore(y_true,y_pred)


In [92]:
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")
print(f"IoU Score: {iouscore}")

Precision: 0.9661867233931971
Recall: 0.6209830688522506
F1 Score: 0.7560446267417663
Accuracy: 0.913078025291706
IoU Score: 0.607774718446285


In [24]:
all_values = np.empty(0)
keys = []
values = []
ioudict = {}

In [25]:
counter = 0
destination_path = "test_directory"
for file_name in os.listdir(dataset_path):    
    y_true = []
    y_test = []
    # Load the dataset image (JPG)
    image_path = os.path.join(dataset_path, file_name)
    image = Image.open(image_path).convert("RGB")
    
    # Load the corresponding masked image (PNG)
    masked_image_path = os.path.join(masked_dataset_path, file_name[:-4] + ".png")  # Assuming the filenames are consistent
    masked_image = Image.open(masked_image_path)
    
    # Preprocess the dataset image
    width, height = image.size
    encoding = feature_extractor(image, return_tensors="pt")
    
    # Perform table detection
    with torch.no_grad():
        outputs = model(**encoding)
    
    # Post-process the detection results
    width, height = image.size
    results = feature_extractor.post_process_object_detection(outputs, threshold=detection_threshold, target_sizes=[(height, width)])[0]
    
    # Convert the results to binary masks
    pred_mask = np.zeros((height, width), dtype=np.uint8)
    for box in results['boxes']:
        xmin, ymin, xmax, ymax = box.tolist()
        xmin, xmax = int(xmin), int(xmax)
        ymin, ymax = int(ymin), int(ymax)
        pred_mask[ymin:ymax, xmin:xmax] = 1
    
    
    # Convert the masked image to binary mask
    true_mask = np.array(masked_image)
    true_mask = true_mask*255
    temp_data = Image.fromarray(true_mask).convert("L")
    true_mask = np.array(temp_data)
    true_mask = np.where(true_mask == 0, 1, 0)

    np.append(all_values,IoUScore(pred_mask,true_mask))
    if IoUScore(pred_mask,true_mask)<0.8:
        filename = os.path.basename(image_path)
        destination_file_path = os.path.join(destination_path, filename)
        with open(source_path, 'rb') as source_file, open(destination_file_path, 'wb') as dest_file:
            dest_file.write(source_file.read())        
        
        keys.append(image_path)
        values.append(IoUScore(pred_mask,true_mask))
    
    if counter%100 == 0:
        print(counter," images done....") 
    
    counter += 1
    

0  images done....
100  images done....
200  images done....
300  images done....
400  images done....
500  images done....


In [26]:
for i in range(len(keys)):
    ioudict[keys[i]] = values[i]

len(ioudict),np.mean(all_values)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


(263, nan)