In [None]:
import os
import cv2
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import (
    LayoutLMv3Processor, 
    LayoutLMv3ForSequenceClassification,
    LayoutLMv3Config,
    get_linear_schedule_with_warmup
)
from PIL import Image
import json
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')

torch.backends.cudnn.benchmark = True
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")

CONFIG = {
    'ROOT_DIR': "/home/hasan/datasets/classify/test",
    'DOC_TYPES': [
        "budget", "ID", "invoice", "form", "memo", "letter",
        "advertisement", "receipt", "scientific_report", "email", "scientific_publication",
        "handwritten", "news_article", "presentation", "resume", "questionnaire", "specification"
    ],
    'BATCH_SIZE': 2,
    'MAX_LENGTH': 512,
    'EPOCHS': 20,
    'LEARNING_RATE': 2e-5,
    'USE_VISUAL': True,
    'WARMUP_STEPS': 500,
    'PATIENCE': 5,
    'MODEL_NAME': "microsoft/layoutlmv3-base"
}

class DocumentTypeDataset(Dataset):
    def __init__(self, samples, processor, max_length=512, use_visual=True):
        self.samples = samples
        self.processor = processor
        self.max_length = max_length
        self.use_visual = use_visual
        
        self.doc_type_mapping = {
            doc_type: idx for idx, doc_type in enumerate(CONFIG['DOC_TYPES'])
        }
        
        print(f"Document type mapping: {self.doc_type_mapping}")
    
    def __len__(self):
        return len(self.samples)
    
    def extract_text_and_boxes_with_processor(self, image):
        """Extract text and bounding boxes using processor's apply_ocr"""
        try:
            ocr_result = self.processor.apply_ocr(image)
            
            words = []
            boxes = []
            
            image_width, image_height = image.size
            
            for item in ocr_result:
                if 'text' in item and 'bbox' in item and len(item['text'].strip()) > 0:
                    text = item['text'].strip()
                    bbox = item['bbox']
                    
                    if len(bbox) == 4:
                        x1, y1, x2, y2 = bbox
                        
                        x1 = max(0, min(int(x1), image_width))
                        y1 = max(0, min(int(y1), image_height))
                        x2 = max(x1 + 1, min(int(x2), image_width))
                        y2 = max(y1 + 1, min(int(y2), image_height))
                        
                        if x2 > x1 and y2 > y1:
                            words.append(text)
                            boxes.append([x1, y1, x2, y2])
            
            if not words:
                words = [""]
                boxes = [[0, 0, min(100, image_width), min(20, image_height)]]
            
            return words, boxes
            
        except Exception as e:
            print(f"OCR extraction failed: {e}")
            image_width, image_height = image.size
            return [""], [[0, 0, min(100, image_width), min(20, image_height)]]
    
    def preprocess_image(self, image_path):
        """Load and preprocess image with better error handling"""
        try:
            if not os.path.exists(image_path):
                print(f"File not found: {image_path}")
                return Image.new('RGB', (224, 224), color='white')
            
            image = Image.open(image_path)
            
            if image.mode != 'RGB':
                image = image.convert('RGB')
            
            max_size = 1000
            if image.size[0] > max_size or image.size[1] > max_size:
                ratio = min(max_size / image.size[0], max_size / image.size[1])
                new_size = (int(image.size[0] * ratio), int(image.size[1] * ratio))
                image = image.resize(new_size, Image.Resampling.LANCZOS)
            
            return image
            
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            return Image.new('RGB', (224, 224), color='white')
    
    def __getitem__(self, idx):
        try:
            image_path, doc_type = self.samples[idx]
            
            image = self.preprocess_image(image_path)
            
            words, boxes = self.extract_text_and_boxes_with_processor(image)
            
            max_words = 200
            if len(words) > max_words:
                words = words[:max_words]
                boxes = boxes[:max_words]
            
            try:
                if self.use_visual:
                    encoding = self.processor(
                        images=image,
                        text=words,
                        boxes=boxes,
                        padding='max_length',
                        truncation=True,
                        max_length=self.max_length,
                        return_tensors="pt"
                    )
                else:
                    encoding = self.processor(
                        text=words,
                        boxes=boxes,
                        padding='max_length',
                        truncation=True,
                        max_length=self.max_length,
                        return_tensors="pt"
                    )
            except ValueError as ve:
                if "cannot provide bounding box" in str(ve).lower() or "apply_ocr" in str(ve).lower():
                    print(f"Bounding box error, using image-only processing: {ve}")
                    if self.use_visual:
                        encoding = self.processor(
                            images=image,
                            padding='max_length',
                            truncation=True,
                            max_length=self.max_length,
                            return_tensors="pt"
                        )
                    else:
                        encoding = self.processor(
                            text=" ".join(words),
                            padding='max_length',
                            truncation=True,
                            max_length=self.max_length,
                            return_tensors="pt"
                        )
                else:
                    raise ve
            
            label = self.doc_type_mapping.get(doc_type, 0)
            
            result = {}
            
            for key, value in encoding.items():
                if value is not None and isinstance(value, torch.Tensor):
                    if value.dim() > 1 and value.size(0) == 1:
                        result[key] = value.squeeze(0)
                    else:
                        result[key] = value
                elif value is not None:
                    result[key] = value
            
            result['labels'] = torch.tensor(label, dtype=torch.long)
            
            return result
            
        except Exception as e:
            print(f"Error processing sample {idx}: {e}")
            fallback = {
                'input_ids': torch.zeros(self.max_length, dtype=torch.long),
                'attention_mask': torch.ones(self.max_length, dtype=torch.long),
                'bbox': torch.zeros((self.max_length, 4), dtype=torch.long),
                'labels': torch.tensor(0, dtype=torch.long)
            }
            
            if self.use_visual:
                fallback['pixel_values'] = torch.zeros((3, 224, 224), dtype=torch.float)
            
            return fallback

def collect_document_samples(root_dir, doc_types):
    """Collect document samples from directory structure with better validation"""
    samples = []
    
    if not os.path.exists(root_dir):
        print(f"Root directory does not exist: {root_dir}")
        return samples
    
    for doc_type in doc_types:
        doc_dir = os.path.join(root_dir, doc_type)
        if not os.path.exists(doc_dir):
            print(f"Document type directory not found: {doc_dir}")
            continue
            
        for split in ["Real", "Forged", "Images", ""]:
            if split:
                img_dir = os.path.join(doc_dir, split, "Images") if split != "Images" else os.path.join(doc_dir, split)
            else:
                img_dir = doc_dir
                
            if os.path.exists(img_dir):
                for fname in os.listdir(img_dir):
                    if fname.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.tiff')):
                        path = os.path.join(img_dir, fname)
                        if os.path.isfile(path):
                            samples.append((path, doc_type))
    
    print(f"Collected {len(samples)} samples")
    for doc_type in doc_types:
        count = sum(1 for _, dt in samples if dt == doc_type)
        if count > 0:
            print(f"  {doc_type}: {count} samples")
    
    return samples

def improved_collate_fn(batch, use_visual=True):
    """Improved collate function with better error handling"""
    if not batch:
        return {}
    
    all_keys = set()
    for item in batch:
        all_keys.update(item.keys())
    
    batch_dict = {}
    
    for key in all_keys:
        values = []
        for item in batch:
            if key in item and item[key] is not None:
                values.append(item[key])
            else:
                if values:
                    if isinstance(values[0], torch.Tensor):
                        default_val = torch.zeros_like(values[0])
                    else:
                        default_val = values[0]
                    values.append(default_val)
        
        if values:
            try:
                if isinstance(values[0], torch.Tensor):
                    shapes = [v.shape for v in values]
                    if len(set(shapes)) == 1:
                        batch_dict[key] = torch.stack(values)
                    else:
                        print(f"Warning: Inconsistent shapes for {key}: {shapes}")
                        max_shape = [max(s[i] for s in shapes) if i < len(s) else 1 
                                   for i in range(max(len(s) for s in shapes))]
                        padded_values = []
                        for v in values:
                            pad_sizes = []
                            for i in range(len(max_shape) - 1, -1, -1):
                                if i < len(v.shape):
                                    pad_size = max_shape[i] - v.shape[i]
                                    pad_sizes.extend([0, pad_size])
                                else:
                                    pad_sizes.extend([0, max_shape[i]])
                            if pad_sizes:
                                v_padded = torch.nn.functional.pad(v, pad_sizes)
                            else:
                                v_padded = v
                            padded_values.append(v_padded)
                        batch_dict[key] = torch.stack(padded_values)
                else:
                    batch_dict[key] = values
            except Exception as e:
                print(f"Error stacking {key}: {e}")
                if isinstance(values[0], torch.Tensor):
                    batch_dict[key] = values[0].unsqueeze(0).repeat(len(batch), *[1]*len(values[0].shape))
                else:
                    batch_dict[key] = values
    
    return batch_dict

def create_dataloaders(root_dir, doc_types, processor, batch_size=2, max_length=512, use_visual=True):
    """Create train/val/test dataloaders with improved error handling"""
    
    all_samples = collect_document_samples(root_dir, doc_types)
    
    if len(all_samples) == 0:
        raise ValueError("No samples found! Check your data directory structure.")
    
    sample_counts = {}
    for _, doc_type in all_samples:
        sample_counts[doc_type] = sample_counts.get(doc_type, 0) + 1
    
    min_samples = 3
    valid_samples = []
    for sample in all_samples:
        if sample_counts[sample[1]] >= min_samples:
            valid_samples.append(sample)
    
    if len(valid_samples) == 0:
        raise ValueError("No classes have enough samples for splitting!")
    
    print(f"Using {len(valid_samples)} samples from {len(set(s[1] for s in valid_samples))} classes")
    
    try:
        train_samples, temp_samples = train_test_split(
            valid_samples, test_size=0.3, 
            stratify=[sample[1] for sample in valid_samples],
            random_state=42
        )
        
        val_samples, test_samples = train_test_split(
            temp_samples, test_size=0.5,
            stratify=[sample[1] for sample in temp_samples],
            random_state=42
        )
    except ValueError as e:
        print(f"Stratification failed: {e}")
        np.random.seed(42)
        np.random.shuffle(valid_samples)
        n_train = int(0.7 * len(valid_samples))
        n_val = int(0.15 * len(valid_samples))
        
        train_samples = valid_samples[:n_train]
        val_samples = valid_samples[n_train:n_train+n_val]
        test_samples = valid_samples[n_train+n_val:]
    
    print(f"Train: {len(train_samples)}, Val: {len(val_samples)}, Test: {len(test_samples)}")
    
    train_dataset = DocumentTypeDataset(train_samples, processor, max_length, use_visual)
    val_dataset = DocumentTypeDataset(val_samples, processor, max_length, use_visual)
    test_dataset = DocumentTypeDataset(test_samples, processor, max_length, use_visual)
    
    train_loader = DataLoader(
        train_dataset, 
        batch_size=batch_size, 
        shuffle=True, 
        num_workers=0,
        pin_memory=torch.cuda.is_available(),
        collate_fn=lambda batch: improved_collate_fn(batch, use_visual)
    )
    
    val_loader = DataLoader(
        val_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=0,
        pin_memory=torch.cuda.is_available(),
        collate_fn=lambda batch: improved_collate_fn(batch, use_visual)
    )
    
    test_loader = DataLoader(
        test_dataset, 
        batch_size=batch_size, 
        shuffle=False, 
        num_workers=0,
        collate_fn=lambda batch: improved_collate_fn(batch, use_visual)
    )
    
    return train_loader, val_loader, test_loader

class LayoutLMv3DocumentClassifier:
    def __init__(self, num_classes=18, model_name="microsoft/layoutlmv3-base", use_visual=True):
        self.num_classes = num_classes
        self.model_name = model_name
        self.use_visual = use_visual
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        print(f"Initializing model on {self.device}")
        
        try:
            self.processor = LayoutLMv3Processor.from_pretrained(model_name, apply_ocr=False)
            
            config = LayoutLMv3Config.from_pretrained(model_name)
            config.num_labels = num_classes
            
            self.model = LayoutLMv3ForSequenceClassification.from_pretrained(
                model_name, 
                config=config,
                ignore_mismatched_sizes=True
            ).to(self.device)
            
            print(f"Model loaded successfully")
            print(f"Number of parameters: {sum(p.numel() for p in self.model.parameters()):,}")
            
        except Exception as e:
            print(f"Error initializing model: {e}")
            raise
    
    def train(self, train_loader, val_loader, epochs=10, learning_rate=2e-5, warmup_steps=500, patience=3):
        """Train the model with improved error handling"""
        
        optimizer = torch.optim.AdamW(
            self.model.parameters(), 
            lr=learning_rate, 
            weight_decay=0.01,
            eps=1e-8
        )
        
        total_steps = len(train_loader) * epochs
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=warmup_steps,
            num_training_steps=total_steps
        )
        
        train_losses = []
        val_losses = []
        train_accs = []
        val_accs = []
        
        best_val_acc = 0
        wait = 0
        
        print("Starting training...")
        
        for epoch in range(epochs):
            self.model.train()
            total_train_loss = 0
            train_predictions = []
            train_labels = []
            
            progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
            
            for batch_idx, batch in enumerate(progress_bar):
                try:
                    batch = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v 
                            for k, v in batch.items()}
                    
                    optimizer.zero_grad()
                    
                    outputs = self.model(**batch)
                    loss = outputs.loss
                    
                    if torch.isnan(loss):
                        print(f"NaN loss detected at batch {batch_idx}, skipping...")
                        continue
                    
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                    optimizer.step()
                    scheduler.step()
                    
                    total_train_loss += loss.item()
                    
                    preds = torch.argmax(outputs.logits, dim=-1)
                    train_predictions.extend(preds.cpu().numpy())
                    train_labels.extend(batch['labels'].cpu().numpy())
                    
                    progress_bar.set_postfix({
                        'loss': f'{loss.item():.4f}',
                        'lr': f'{scheduler.get_last_lr()[0]:.2e}'
                    })
                    
                except Exception as e:
                    print(f"Error in training batch {batch_idx}: {e}")
                    continue
            
            if len(train_loader) == 0:
                print("No valid batches in training loader!")
                break
                
            avg_train_loss = total_train_loss / len(train_loader)
            train_acc = accuracy_score(train_labels, train_predictions) if train_predictions else 0
            
            try:
                val_loss, val_acc = self.evaluate(val_loader)
            except Exception as e:
                print(f"Validation error: {e}")
                val_loss, val_acc = float('inf'), 0
            
            train_losses.append(avg_train_loss)
            val_losses.append(val_loss)
            train_accs.append(train_acc)
            val_accs.append(val_acc)
            
            print(f"Epoch {epoch+1}/{epochs}")
            print(f"Train Loss: {avg_train_loss:.4f}, Train Acc: {train_acc:.4f}")
            print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
            print("-" * 50)
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                try:
                    self.save_model("best_layoutlmv3_document_classifier_17types.pt")
                except Exception as e:
                    print(f"Error saving model: {e}")
                wait = 0
            else:
                wait += 1
                if wait >= patience:
                    print("Early stopping triggered")
                    break
        
        try:
            self.plot_training_curves(train_losses, val_losses, train_accs, val_accs)
        except Exception as e:
            print(f"Error plotting training curves: {e}")
        
        return train_losses, val_losses, train_accs, val_accs
    
    def evaluate(self, dataloader):
        """Evaluate the model with improved error handling"""
        self.model.eval()
        total_loss = 0
        predictions = []
        labels = []
        valid_batches = 0
        
        with torch.no_grad():
            for batch in tqdm(dataloader, desc="Evaluating", leave=False):
                try:
                    batch = {k: v.to(self.device) if isinstance(v, torch.Tensor) else v 
                            for k, v in batch.items()}
                    
                    outputs = self.model(**batch)
                    loss = outputs.loss
                    
                    if not torch.isnan(loss):
                        total_loss += loss.item()
                        valid_batches += 1
                        
                        preds = torch.argmax(outputs.logits, dim=-1)
                        predictions.extend(preds.cpu().numpy())
                        labels.extend(batch['labels'].cpu().numpy())
                    
                except Exception as e:
                    print(f"Error in evaluation batch: {e}")
                    continue
        
        if valid_batches == 0:
            return float('inf'), 0
            
        avg_loss = total_loss / valid_batches
        accuracy = accuracy_score(labels, predictions) if predictions else 0
        
        return avg_loss, accuracy
    
    def save_model(self, path):
        """Save model with error handling"""
        try:
            torch.save({
                'model_state_dict': self.model.state_dict(),
                'num_classes': self.num_classes,
                'model_name': self.model_name,
                'use_visual': self.use_visual
            }, path)
            print(f"Model saved to {path}")
        except Exception as e:
            print(f"Error saving model: {e}")
    
    def load_model(self, path):
        """Load model with error handling"""
        try:
            checkpoint = torch.load(path, map_location=self.device)
            self.model.load_state_dict(checkpoint['model_state_dict'])
            print(f"Model loaded from {path}")
        except Exception as e:
            print(f"Error loading model: {e}")
    
    def plot_training_curves(self, train_losses, val_losses, train_accs, val_accs):
        """Plot training curves"""
        try:
            fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
            
            ax1.plot(train_losses, label='Train Loss')
            ax1.plot(val_losses, label='Val Loss')
            ax1.set_title('Training and Validation Loss')
            ax1.set_xlabel('Epoch')
            ax1.set_ylabel('Loss')
            ax1.legend()
            ax1.grid(True)
            
            ax2.plot(train_accs, label='Train Accuracy')
            ax2.plot(val_accs, label='Val Accuracy')
            ax2.set_title('Training and Validation Accuracy')
            ax2.set_xlabel('Epoch')
            ax2.set_ylabel('Accuracy')
            ax2.legend()
            ax2.grid(True)
            
            plt.tight_layout()
            plt.show()
        except Exception as e:
            print(f"Error plotting curves: {e}")

def main():
    """Main function to run the training"""
    try:
        classifier = LayoutLMv3DocumentClassifier(
            num_classes=len(CONFIG['DOC_TYPES']),
            model_name=CONFIG['MODEL_NAME'],
            use_visual=CONFIG['USE_VISUAL']
        )
        
        train_loader, val_loader, test_loader = create_dataloaders(
            CONFIG['ROOT_DIR'],
            CONFIG['DOC_TYPES'],
            classifier.processor,
            CONFIG['BATCH_SIZE'],
            CONFIG['MAX_LENGTH'],
            CONFIG['USE_VISUAL']
        )
        
        print("Testing data loading...")
        for batch in train_loader:
            print(f"Batch keys: {batch.keys()}")
            for key, value in batch.items():
                if isinstance(value, torch.Tensor):
                    print(f"{key}: {value.shape}")
                else:
                    print(f"{key}: {type(value)}")
            break
        
        train_losses, val_losses, train_accs, val_accs = classifier.train(
            train_loader, val_loader,
            epochs=CONFIG['EPOCHS'],
            learning_rate=CONFIG['LEARNING_RATE'],
            warmup_steps=CONFIG['WARMUP_STEPS'],
            patience=CONFIG['PATIENCE']
        )
        
        print("Training completed successfully!")
        
    except Exception as e:
        print(f"Error in main execution: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

2025-06-12 21:47:01.578652: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-12 21:47:01.933353: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-12 21:47:01.933666: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-12 21:47:01.997249: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-12 21:47:02.134040: I tensorflow/core/platform/cpu_feature_guar

PyTorch version: 2.7.1+cu126
CUDA available: True
CUDA device: NVIDIA GeForce RTX 4050 Laptop GPU
Initializing model on cuda


Some weights of LayoutLMv3ForSequenceClassification were not initialized from the model checkpoint at microsoft/layoutlmv3-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded successfully
Number of parameters: 125,930,641
Collected 3244 samples
  ID: 688 samples
  receipt: 2556 samples
Using 3244 samples from 2 classes
Train: 2270, Val: 487, Test: 487
OCR reader initialized successfully
Document type mapping: {'budget': 0, 'ID': 1, 'invoice': 2, 'form': 3, 'memo': 4, 'letter': 5, 'advertisement': 6, 'receipt': 7, 'scientific_report': 8, 'email': 9, 'scientific_publication': 10, 'handwritten': 11, 'news_article': 12, 'presentation': 13, 'resume': 14, 'questionnaire': 15, 'specification': 16}
OCR reader initialized successfully
Document type mapping: {'budget': 0, 'ID': 1, 'invoice': 2, 'form': 3, 'memo': 4, 'letter': 5, 'advertisement': 6, 'receipt': 7, 'scientific_report': 8, 'email': 9, 'scientific_publication': 10, 'handwritten': 11, 'news_article': 12, 'presentation': 13, 'resume': 14, 'questionnaire': 15, 'specification': 16}
OCR reader initialized successfully
Document type mapping: {'budget': 0, 'ID': 1, 'invoice': 2, 'form': 3, 'memo': 4

Epoch 1/20:   0%|          | 0/1135 [00:00<?, ?it/s]

Error processing sample 863: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 915: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 372: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1321: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1382: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 737: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 2033: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 198: You cannot provide bounding boxes if you initialized the i

Error processing sample 1872: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 685: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 788: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1171: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1799: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 2042: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 212: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 450: You cannot provide bounding boxes if you initialized the 

Error processing sample 1455: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 408: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 2066: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 2053: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 776: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 942: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 890: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 534: You cannot provide bounding boxes if you initialized the i

Error processing sample 1924: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 641: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 840: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 616: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 591: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1990: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 759: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 879: You cannot provide bounding boxes if you initialized the im

Error processing sample 2054: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 61: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1641: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1112: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 836: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 631: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1902: You cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.
Error processing sample 1362: You cannot provide bounding boxes if you initialized the 