In [None]:
import zipfile
import os

zip_path = "/content/drive/My Drive/archive.zip"  # Update if needed
extract_path = "/content/sample_data/ML_DATA"

os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extraction completed!")

Extraction completed!




In [None]:
from pdf2image import convert_from_path
import numpy as np

In [None]:
def pdf_to_images(pdf_path):
    # Convert PDF to a list of images (one image per page)
    images = convert_from_path(pdf_path)
    return images
def preprocess_image(img, target_size=(128, 128)):
    img_resized = img.resize(target_size)
    img_array = np.array(img_resized) / 255.0  # Normalize to [0, 1]
    return np.expand_dims(img_array, axis=0)
from tensorflow.keras.models import load_model
# Load the model
model = load_model('/content/drive/My Drive/table_detection_model.h5')



In [None]:
def predict_bboxes_from_pdf(model, pdf_path):
    images = pdf_to_images(pdf_path)
    bboxes = []

    for img in images:
        img_array = preprocess_image(img)
        bbox = model.predict(img_array)[0]  # Predict bounding box
        bboxes.append(bbox)

    return bboxes

In [None]:
def calculate_iou(pred_bbox, true_bbox):
    # Calculate Intersection over Union (IoU) between two bounding boxes
    x_min_pred, y_min_pred, x_max_pred, y_max_pred = pred_bbox
    x_min_true, y_min_true, x_max_true, y_max_true = true_bbox

    # Compute intersection area
    inter_x_min = max(x_min_pred, x_min_true)
    inter_y_min = max(y_min_pred, y_min_true)
    inter_x_max = min(x_max_pred, x_max_true)
    inter_y_max = min(y_max_pred, y_max_true)

    # No intersection
    if inter_x_min >= inter_x_max or inter_y_min >= inter_y_max:
        return 0.0

    intersection_area = (inter_x_max - inter_x_min) * (inter_y_max - inter_y_min)

    # Compute union area
    pred_area = (x_max_pred - x_min_pred) * (y_max_pred - y_min_pred)
    true_area = (x_max_true - x_min_true) * (y_max_true - y_min_true)

    union_area = pred_area + true_area - intersection_area

    # Calculate IoU
    iou = intersection_area / union_area
    return iou


In [None]:
import os
import json
import numpy as np
import pandas as pd
from PIL import Image
import tensorflow as tf
from pdf2image import convert_from_path

# Load the trained model
model = tf.keras.models.load_model('/content/drive/My Drive/table_detection_model.h5')

# Convert PDF to images
def pdf_to_images(pdf_path, output_folder):
    images = convert_from_path(pdf_path)
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for i, img in enumerate(images):
        img_path = os.path.join(output_folder, f'page_{i+1}.jpg')
        img.save(img_path, 'JPEG')
        print(f"Saved {img_path}")

# Preprocess image (resize and normalize)
def preprocess_image(img_path, img_size=(128, 128)):
    img = Image.open(img_path)
    img = img.resize(img_size)
    img_array = np.array(img) / 255.0  # Normalize to [0, 1]
    return np.expand_dims(img_array, axis=0)  # Add batch dimension

# Predict bounding boxes for each image
def predict_bboxes(model, img_folder, output_csv):
    results = []

    for img_file in os.listdir(img_folder):
        img_path = os.path.join(img_folder, img_file)

        # Preprocess the image
        img_array = preprocess_image(img_path)

        # Predict bounding box
        bbox = model.predict(img_array)[0]

        # Save results
        results.append({
            'image': img_file,
            'bbox': bbox.tolist()
        })

    # Save results to a CSV file
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"Results saved to {output_csv}")

# Step 1: Convert the PDF to images
pdf_path = '/content/drive/My Drive/EX-99.pdf'  # Replace with your PDF file path
output_folder = '/content/drive/My Drive/Predected_Data'  # Folder to save the images
pdf_to_images(pdf_path, output_folder)

# Step 2: Predict bounding boxes for the images
output_csv = '/content/drive/My Drive/Predected_Data/output_bboxes.csv'  # Path to save the bounding box predictions
predict_bboxes(model, output_folder, output_csv)




Saved /content/drive/My Drive/Predected_Data/page_1.jpg
Saved /content/drive/My Drive/Predected_Data/page_2.jpg
Saved /content/drive/My Drive/Predected_Data/page_3.jpg
Saved /content/drive/My Drive/Predected_Data/page_4.jpg
Saved /content/drive/My Drive/Predected_Data/page_5.jpg
Saved /content/drive/My Drive/Predected_Data/page_6.jpg
Saved /content/drive/My Drive/Predected_Data/page_7.jpg
Saved /content/drive/My Drive/Predected_Data/page_8.jpg
Saved /content/drive/My Drive/Predected_Data/page_9.jpg
Saved /content/drive/My Drive/Predected_Data/page_10.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 207ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 78ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/