Add "pdf to text" converter using "pdf.js" and FileReader (works well)

TI-Planet · Feb 17, 2018 · b3d55a1 · b3d55a1
1 parent b2791d4
commit b3d55a1
Show file tree

Hide file tree

Showing 8 changed files with 63,802 additions and 3 deletions.
diff --git a/images/PDF.png b/images/PDF.png
diff --git a/index.html b/index.html
@@ -1,3 +1,4 @@
+
 <!DOCTYPE html>
 <html lang="en">
 <head>
@@ -9,7 +10,9 @@
     <script src="FileSaver.min.js" async></script>
     <script src="tivars_test.js" async></script>
     <script src="generator.js?new=2"></script>
-
+      <script src="pdf2text/pdf.js"></script>
+    <script src="pdf2text/functions.js"></script>
+
     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous">
     <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap-theme.min.css" integrity="sha384-rHyoN1iRsVXV4nD0JutlnGaslCJuC7uwjduW9SVrLvRYooPp2bWYgmgJQIXwl/Sp" crossorigin="anonymous">
     <link rel='stylesheet' href='https://fonts.googleapis.com/css?family=Geo' type='text/css'>
@@ -23,7 +26,7 @@
 <div class="container">
     <div class="jumbotron">
         <div class="reduced">
-            <h3 id="topTitle" style="display:none">zText: convertisseur texte => 8xp</h3>
+            <h3 id="topTitle" style="display:none">zText: convertisseur pdf/texte => 8xp</h3>
 
             <form id="convForm">
                 <section id="advancedCreator">
@@ -45,7 +48,9 @@ <h3 id="topTitle" style="display:none">zText: convertisseur texte => 8xp</h3>
                     </section>
                     <br>
                     <div class="hiddenWhenLargeViewer">
-                    Tu souhaites organiser ton cours en plusieurs parties avec un menu ? <button id="toggleAdvanced" class="btn btn-success btn-xs">Clique-ici</button>
+                    Tu souhaites organiser ton cours en plusieurs parties avec un menu ? <button id="toggleAdvanced" class="btn btn-success btn-xs">Clique-ici</button><br /><br />
+                    Tu peux aussi charger un fichier PDF en rentrant l'adresse de celui-ci:
+                    <input type="file" id="pdfFile" onchange="handlePdf(this.files)"/>
                     <br><br>
                     </div>
                     <div id="textareaWrapper">
@@ -107,6 +112,7 @@ <h4>Calculatrice cible<span class="hiddenWhenLargeViewer"> et aperçu</span> :</
 
 <script>
 
+
     function inIframe() {
         try {
             return window.self !== window.top;

diff --git a/pdf2text/functions.js b/pdf2text/functions.js
@@ -0,0 +1,59 @@
+function getPageText(pageNum, PDFDocumentInstance) {
+    // Return a Promise that is solved once the text of the page is retrieven
+    return new Promise(function (resolve, reject) {
+        PDFDocumentInstance.getPage(pageNum).then(function (pdfPage) {
+            // The main trick to obtain the text of the PDF page, use the getTextContent method
+            pdfPage.getTextContent().then(function (textContent) {
+                var textItems = textContent.items;
+                var finalString = "";
+
+                // Concatenate the string of the item to the final string
+                for (var i = 0; i < textItems.length; i++) {
+                    var item = textItems[i];
+
+                    finalString += item.str + " ";
+                }
+
+                // Solve promise with the text retrieven from the page
+                resolve(finalString);
+            });
+        });
+    });
+}
+
+function handlePdf(data){
+
+    document.getElementById('myText0').innerHTML = "* Début du fichier PDF*\n";
+
+    var fileList = this.files;
+    var MyFile = data[0];
+    var fileReader = new FileReader();
+
+    fileReader.onload = function() {
+
+        var typedarray = new Uint8Array(this.result);
+
+         PDFJS.getDocument(typedarray).then(function(pdf) {
+
+            var pdfDocument = pdf;
+            var pagesPromises = [];
+
+        for (var i = 0; i < pdf.pdfInfo.numPages; i++) {
+            (function (pageNumber) {
+                pagesPromises.push(getPageText(pageNumber, pdfDocument));
+            })(i + 1);
+        }
+
+        Promise.all(pagesPromises).then(function (pagesText) {
+             document.getElementById('myText0').innerHTML += pagesText; //fill textarea with extracted text
+        });
+
+        delete data;
+
+        }, function (reason) {
+            alert(reason);
+        });
+    };
+
+    fileReader.readAsArrayBuffer(MyFile);
+}