Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@
$signatureFieldFound = true;

$v = DictionaryHelper::resolveAttribute($fieldData, 'V');
if (!$v || !$v->ensure() instanceof PdfDictionary) {
if (!$v instanceof PdfDictionary) {
echo ' But not digital signed.<br /><br />';
continue;
}

echo ' Including a digital signature.<br />';

// This is the signature value
$signatureData = $v->ensure()->getValue('Contents')->ensure()->getValue();
$signatureData = PdfHexString::ensureType(DictionaryHelper::getValue($v, 'Contents'))->getValue();
$signatureData = rtrim($signatureData, "\0");

echo '<a href="https://lapo.it/asn1js/#' . PdfHexString::str2hex($signatureData) . '" ' .
Expand All @@ -58,14 +58,14 @@

echo '<br />';

$value = $v->ensure();
$signatureProperties = [];
foreach (['Name', 'Location', 'ContactInfo', 'Reason', 'M'] as $property) {
if (!$value->offsetExists($property)) {
$value = DictionaryHelper::getValue($v, $property);
if ($value === null) {
continue;
}

$propertyValue = $value->getValue($property)->ensure()->getValue();
$propertyValue = $value->getValue();
if ($property === 'M') {
$propertyValue = Date::stringToDateTime($propertyValue);
} else {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"name": "Filter Same Area Several Times",
"teaserText": "A demo that filters the same area several times to recognize a document type.",
"requires": [
"SetaPDF-Core", "SetaPDF-Extractor"
],
"faIcon": "&#xf0b0;",
"faIcon2": "&#xf002;"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<p>
If a filter is directly attached to a strategy each text-item will be checked against it and will exist only once
in the result.<br/>
If e.g. two rectangle filters are combine in a multifilter but share the same or overlapping area, only one
instance will get the text-items. This can lead to unexpected results.
</p>
<p>
This demo shows a way, to resolve all text-items once while filter them afterward several times to identify a
document type. The rectangle filters use almost the same area.
</p>
66 changes: 66 additions & 0 deletions public/demos/2-Extractor/filter/same-area-several-times/script.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
<?php

use setasign\SetaPDF2\Core\Document;
use setasign\SetaPDF2\Core\Geometry\Rectangle;
use setasign\SetaPDF2\Extractor\Extractor;
use setasign\SetaPDF2\Extractor\Filter\RectangleFilter;
use setasign\SetaPDF2\Extractor\Strategy\ExactPlainStrategy;

// load and register the autoload function
require_once __DIR__ . '/../../../../../bootstrap.php';

$files = [
$assetsDirectory . '/pdfs/camtown/Terms-and-Conditions.pdf',
$assetsDirectory . '/pdfs/etown/Laboratory-Report.pdf',
$assetsDirectory . '/pdfs/lenstown/Subscription-tekMag.pdf',
$assetsDirectory . '/pdfs/Brand-Guide.pdf'
];

$path = displayFiles($files);

$document = Document::loadByFilename($path);
$extractor = new Extractor($document);

$strategy = new ExactPlainStrategy();
$extractor->setStrategy($strategy);

// we prepare an initial filter to reduce the text item count
$mainFilter = new RectangleFilter(new Rectangle(0, 752, 596, 649));
$strategy->setFilter($mainFilter);

$filters = [
'Laboratory Report' => new RectangleFilter(new Rectangle(36, 712, 240, 674)),
'Terms and Conditions' => new RectangleFilter(new Rectangle(32, 716, 286, 671)),
'Subscription tekMag' => new RectangleFilter(new Rectangle(31, 713, 262, 672))
];

$page = $document->getCatalog()->getPages()->getPage(1);
$textItems = $extractor->getTextItemsByPage($page);

foreach ($filters as $filterName => $filter) {
// tell the filter about the page, the text-items came from
$filter->setPage($page);
// now we filter the existing text-items by the individual filter
$result = $strategy->getResultByTextItems($textItems, $filter);
if ($result === $filterName) {
$match = $filterName;
break;
}
}

switch ($match ?? null) {
case 'Laboratory Report':
echo 'This document is a laboratory report!';
// ...process it
break;
case 'Terms and Conditions':
echo 'This document shows Terms and Conditions!';
// ...process it
break;
case 'Subscription tekMag':
echo 'This document is a subscription form to the tekMag!';
// ...process it
break;
default:
echo 'Sorry, but I cannot recognize the document type.';
}