In [None]:
import torch
from transformers import TableTransformerForObjectDetection, DetrImageProcessor
from PIL import Image
import pytesseract  # Tesseract OCR
import pandas as pd

# Function to load the model and processor
def load_model():
    processor = DetrImageProcessor.from_pretrained("microsoft/table-transformer-detection")
    model = TableTransformerForObjectDetection.from_pretrained("microsoft/table-transformer-detection")
    return processor, model

# Function to load an image
def load_image(image_path):
    return Image.open(image_path)

# Function to perform object detection on a table image
def detect_table(processor, model, image):
    inputs = processor(images=image, return_tensors="pt")
    outputs = model(**inputs)
    target_sizes = torch.tensor([image.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    return results

# Function to perform OCR on detected table cells and return extracted text as a list
def extract_text_from_cells(image, results):
    cell_texts = []
    for box in results['boxes']:
        box = [round(i, 2) for i in box.tolist()]
        xmin, ymin, xmax, ymax = box

        # Crop the detected cell from the image
        cropped_image = image.crop((xmin, ymin, xmax, ymax))

        # Use Tesseract OCR to extract text from the cropped image
        extracted_text = pytesseract.image_to_string(cropped_image, config='--psm 7')  # Single-line mode for table cells
        cell_texts.append(extracted_text.strip())
    
    return cell_texts

# Function to save extracted text to Excel
def save_to_excel(cell_texts, excel_file_path):
    df = pd.DataFrame(cell_texts, columns=["Extracted Text"])  # Save extracted text in a single column
    df.to_excel(excel_file_path, index=False)
    print(f"Extracted text saved to {excel_file_path}")

if __name__ == "__main__":
    # Load the model and processor
    processor, model = load_model()

    # Image path (change this to your image file)
    image_path = "path_to_your_image_file.jpg"  # Replace with actual image path
    image = load_image(image_path)

    # Perform detection
    results = detect_table(processor, model, image)

    # Extract text from detected table cells
    cell_texts = extract_text_from_cells(image, results)

    # Save extracted text to Excel
    excel_file_path = "extracted_table_data.xlsx"  # Change the file name/path as needed
    save_to_excel(cell_texts, excel_file_path)
