Skip to content

Commit

Permalink
Merge pull request #14 from BobLd/nurminen-detect
Browse files Browse the repository at this point in the history
Implement Simple Nurminen Detection Algorithm
  • Loading branch information
BobLd committed Sep 22, 2020
2 parents f8c2fe9 + e9584ea commit 32c6f8a
Show file tree
Hide file tree
Showing 12 changed files with 974 additions and 1,038 deletions.
3 changes: 2 additions & 1 deletion Tabula.Csv/Tabula.Csv.csproj
Expand Up @@ -4,7 +4,8 @@
<TargetFrameworks>netcoreapp3.1;netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
<Description>Extract tables from PDF files (port of tabula-java using PdfPig). Csv and Tsv writers.</Description>
<PackageProjectUrl>https://github.com/BobLd/tabula-sharp</PackageProjectUrl>
<Version>0.1.0-alpha001</Version>
<Version>0.1.0-alpha002</Version>
<Authors>BobLd</Authors>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netcoreapp3.1|AnyCPU'">
Expand Down
4 changes: 3 additions & 1 deletion Tabula.Json/Tabula.Json.csproj
Expand Up @@ -4,7 +4,9 @@
<TargetFrameworks>netcoreapp3.1;netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
<Description>Extract tables from PDF files (port of tabula-java using PdfPig). Json writer.</Description>
<PackageProjectUrl>https://github.com/BobLd/tabula-sharp</PackageProjectUrl>
<Version>0.1.0-alpha001</Version>
<Version>0.1.0-alpha002</Version>
<Company>BobLd</Company>
<Authors>BobLd</Authors>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Release|netcoreapp3.1|AnyCPU'">
Expand Down
33 changes: 33 additions & 0 deletions Tabula.Tests/TestsNurminenDetector.cs
@@ -0,0 +1,33 @@
using System;
using System.Collections.Generic;
using System.Text;
using Tabula.Detectors;
using Tabula.Extractors;
using UglyToad.PdfPig;
using Xunit;

namespace Tabula.Tests
{
public class TestsNurminenDetector
{
[Fact(Skip = "TO DO")]
public void TestLinesToCells()
{
using (PdfDocument document = PdfDocument.Open(@"test3.pdf", new ParsingOptions() { ClipPaths = true }))
{
ObjectExtractor oe = new ObjectExtractor(document);
PageArea page = oe.Extract(1);

SimpleNurminenDetectionAlgorithm detector = new SimpleNurminenDetectionAlgorithm();
var regions = detector.Detect(page);

foreach (var a in regions)
{
IExtractionAlgorithm ea = new BasicExtractionAlgorithm();
var newArea = page.GetArea(a.BoundingBox);
List<Table> tables = ea.Extract(newArea);
}
}
}
}
}

0 comments on commit 32c6f8a

Please sign in to comment.