In [5]:
from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch

local_model_path = "../model"
# Load model and processor from local path
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    local_model_path, torch_dtype="auto", device_map="cpu"
)
processor = AutoProcessor.from_pretrained(local_model_path)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


In [None]:
import matplotlib.pyplot as plt
import fitz  # PyMuPDF
from PIL import Image
import numpy as np
import pandas as pd
import os

In [None]:
def load_and_process_files(file_paths, resize_to=(512, 512), dpi=150):
    if isinstance(file_paths, str):
        file_paths = [file_paths]

    all_images = []

    for path in file_paths:
        ext = os.path.splitext(path)[1].lower()

        if ext == '.pdf':
            # Handle PDF pages
            doc = fitz.open(path)
            for page_number in range(len(doc)):
                page = doc[page_number]
                pix = page.get_pixmap(dpi=dpi)
                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                #processed = crop_non_white_area(img)
                processed = img
                if processed and resize_to:
                    processed = processed.resize(resize_to)
                if processed:
                    all_images.append(processed)
        else:
            # Handle image file
            img = Image.open(path).convert("RGB")
            processed = img
            #processed = crop_non_white_area(img)
            if processed and resize_to:
                processed = processed.resize(resize_to)
            if processed:
                all_images.append(processed)

    return all_images




In [8]:
def get_message(image_paths, prompt):
    messages = [
    {
        "role": "user",
        "content": [
            # Add each image entry
            *[
                {
                    "type": "image",
                    "image": path,
                } for path in image_paths
            ],
            # Add the final instruction
            {
                "type": "text",
                "text": prompt,
            }
        ],
    }
   ]
    return messages
   

In [9]:
def get_output_text(messages):
    text = processor.apply_chat_template(
        messages, tokenize=False, add_generation_prompt=True
    )
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(
        text=[text],
        images=image_inputs,
        videos=video_inputs,
        padding=True,
        return_tensors="pt",
    )
    with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=180)
    generated_ids_trimmed = [
        out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
    ]
    output_text = processor.batch_decode(
        generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=True
    )
    return output_text


In [10]:
import re

def get_data_from_output_text(output_text):
    # Extract the string from the list
    text = output_text[0]

    # Initialize dictionary
    data = {}
    for line in text.split('\n'):
        line = line.strip()

        if ':' in line:
            raw_key, value = line.split(':', 1)
            value = value.strip()

            # Clean key: remove everything before the first letter and after the last letter
            cleaned_key = re.sub(r'^[^a-zA-ZÀ-ÿ]+', '', raw_key)        # Remove prefix
            cleaned_key = re.sub(r'[^a-zA-ZÀ-ÿ\s]+$', '', cleaned_key)  # Remove suffix
            cleaned_key = cleaned_key.strip()

            if cleaned_key and value:
                data[cleaned_key] = value

    return data


In [None]:
def is_scanned_pdf(file_path):
    with fitz.open(file_path) as doc:
        for page in doc:
            if page.get_text().strip():
                return False  # Contains text → not scanned
    return True  # No text → likely scanned
path = "../images/assur_B-07.png"
print(is_scanned_pdf(path)) 

True


In [11]:
FA_path = [
    "../images/FA_without.png",
   ]

FA_prompt_classify = "If the table in this image contains values like dates, words, or numbers — respond with 1. If it contains only empty cells, placeholders or just a word (NEANT) writted with a grop of asterisks, respond with 0"
message_classify = get_message(FA_path, FA_prompt_classify)
FA_class = get_output_text(message_classify)
print(FA_class)


['1']


In [None]:
FA_path = ["../Qwen2.5-VL-3B-Instruct/images/FA.jpg",]
FA_prompt = "From this image, Extract these informations: Prénom, Nom, CIN, date début validité, date fin validité "
message_FA = get_message(FA_path, FA_prompt)
if FA_class[0] == "1":
    print("Warning: FA may be noot clean")
else:
    print("FA is clean")
FA_infos = get_output_text(message_FA)
print(FA_infos)

In [13]:
FA_data = get_data_from_output_text(FA_infos)
print(FA_data)

NameError: name 'FA_infos' is not defined

In [None]:
image_paths = ['extracted_images\\cin_recto.png', 'extracted_images\\cin_verso.png']
cin_prompt = "From these images, Extract these informations : prénom, Nom, date de naissance, Nationalité, date d'expiration de la carte, CIN(numéro de la carte nationale), Sexe(M ou F)"
message_cin = get_message(image_paths, cin_prompt)

In [32]:
cin_output = get_output_text(message_cin)
for line in cin_output:
    print(line)
cin_data = get_data_from_output_text(cin_output)

Voici les informations extraites des documents :

Prénom : SOUFIANE
Nom : LAALIAOUI
Date de naissance : 07.03.2001
Nationalité : Marocaine
Date d'expiration de la carte : 02.04.2029
CIN (numéro de la carte nationale) : W451299
Adresse : DR OLD ALI LAMRAZIG SIDI EL AIDI SETTAT
Sexe : M (Masculin)


In [None]:
if cin_data and FA_data:
    if (
        cin_data.get("Prénom") != FA_data.get("Prénom") and
        cin_data.get("Nom de la famille") != FA_data.get("Nom de la famille") or
        cin_data.get("CIN") != FA_data.get("CIN")
    ):
        print("Documents are mismatched.")
    else:
        # Combine the two dictionaries
        combined_data = {**cin_data, **FA_data}
        print("Documents match. Combined data:")
        print(combined_data)

        # Save the combined data to an Excel file
        df = pd.DataFrame([combined_data])  # wrap in list to get a row
        df.to_excel("cin_FA_infos.xlsx", index=False)
else:
    print("One of the dictionaries is empty.")
