Skip to content

Commit

Permalink
Add "pdf to text" converter using "pdf.js" and FileReader (works well)
Browse files Browse the repository at this point in the history
  • Loading branch information
Wistaro committed Feb 17, 2018
1 parent b2791d4 commit b3d55a1
Show file tree
Hide file tree
Showing 8 changed files with 63,802 additions and 3 deletions.
Binary file added images/PDF.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 9 additions & 3 deletions index.html
@@ -1,3 +1,4 @@

<!DOCTYPE html>
<html lang="en">
<head>
Expand All @@ -9,7 +10,9 @@
<script src="FileSaver.min.js" async></script>
<script src="tivars_test.js" async></script>
<script src="generator.js?new=2"></script>

<script src="pdf2text/pdf.js"></script>
<script src="pdf2text/functions.js"></script>

<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap-theme.min.css" integrity="sha384-rHyoN1iRsVXV4nD0JutlnGaslCJuC7uwjduW9SVrLvRYooPp2bWYgmgJQIXwl/Sp" crossorigin="anonymous">
<link rel='stylesheet' href='https://fonts.googleapis.com/css?family=Geo' type='text/css'>
Expand All @@ -23,7 +26,7 @@
<div class="container">
<div class="jumbotron">
<div class="reduced">
<h3 id="topTitle" style="display:none">zText: convertisseur texte => 8xp</h3>
<h3 id="topTitle" style="display:none">zText: convertisseur pdf/texte => 8xp</h3>

<form id="convForm">
<section id="advancedCreator">
Expand All @@ -45,7 +48,9 @@ <h3 id="topTitle" style="display:none">zText: convertisseur texte => 8xp</h3>
</section>
<br>
<div class="hiddenWhenLargeViewer">
Tu souhaites organiser ton cours en plusieurs parties avec un menu ? <button id="toggleAdvanced" class="btn btn-success btn-xs">Clique-ici</button>
Tu souhaites organiser ton cours en plusieurs parties avec un menu ? <button id="toggleAdvanced" class="btn btn-success btn-xs">Clique-ici</button><br /><br />
Tu peux aussi charger un fichier PDF en rentrant l'adresse de celui-ci:
<input type="file" id="pdfFile" onchange="handlePdf(this.files)"/>
<br><br>
</div>
<div id="textareaWrapper">
Expand Down Expand Up @@ -107,6 +112,7 @@ <h4>Calculatrice cible<span class="hiddenWhenLargeViewer"> et aperçu</span> :</

<script>


function inIframe() {
try {
return window.self !== window.top;
Expand Down
59 changes: 59 additions & 0 deletions pdf2text/functions.js
@@ -0,0 +1,59 @@
function getPageText(pageNum, PDFDocumentInstance) {
// Return a Promise that is solved once the text of the page is retrieven
return new Promise(function (resolve, reject) {
PDFDocumentInstance.getPage(pageNum).then(function (pdfPage) {
// The main trick to obtain the text of the PDF page, use the getTextContent method
pdfPage.getTextContent().then(function (textContent) {
var textItems = textContent.items;
var finalString = "";

// Concatenate the string of the item to the final string
for (var i = 0; i < textItems.length; i++) {
var item = textItems[i];

finalString += item.str + " ";
}

// Solve promise with the text retrieven from the page
resolve(finalString);
});
});
});
}

function handlePdf(data){

document.getElementById('myText0').innerHTML = "* Début du fichier PDF*\n";

var fileList = this.files;
var MyFile = data[0];
var fileReader = new FileReader();

fileReader.onload = function() {

var typedarray = new Uint8Array(this.result);

PDFJS.getDocument(typedarray).then(function(pdf) {

var pdfDocument = pdf;
var pagesPromises = [];

for (var i = 0; i < pdf.pdfInfo.numPages; i++) {
(function (pageNumber) {
pagesPromises.push(getPageText(pageNumber, pdfDocument));
})(i + 1);
}

Promise.all(pagesPromises).then(function (pagesText) {
document.getElementById('myText0').innerHTML += pagesText; //fill textarea with extracted text
});

delete data;

}, function (reason) {
alert(reason);
});
};

fileReader.readAsArrayBuffer(MyFile);
}

0 comments on commit b3d55a1

Please sign in to comment.