### Name :  1-1-CisPdf2CsvByIndiv :
### Function :  Extract text content from CIS PDF files 
### Source Data :  高雄市議會1屆1次至2屆8次市政總質詢

In [1]:
# -*- coding: utf-8 -*-
"""
Created on May 13 2020
@author: johnson
"""

# Extract PDF by Page
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.pdfpage import PDFPage
import io
import glob
import csv
import os.path

###  user function : extract text data from PDF file via pdfminer 

In [2]:
def extract_text_by_page(pdfFile):
    with open(pdfFile, 'rb') as fh:
        for page in PDFPage.get_pages(fh, caching=True, check_extractable=True):
            resource_manager = PDFResourceManager()
            #codec = 'utf-8'
            fake_file_handle = io.StringIO()
            converter = TextConverter(resource_manager, fake_file_handle)
            #  via TextConvert or HTMLConverter or XMLConverter
            page_interpreter = PDFPageInterpreter(resource_manager, converter)
            page_interpreter.process_page(page)
            text = fake_file_handle.getvalue()
            # print(text)
            yield text
            # close open handles
            converter.close()
            fake_file_handle.close()

### user function : 
###        1) receive PDF file
###        2) pass to  user function extract_text_by_page()   
###        3) write into output file

In [3]:
def export_as_csv(pdf_file, csv_path, isNew):
    global FSeqNo
    if isNew:
        opMode = "w"
    else:
        opMode = 'a'
    with open(csv_path, opMode, newline='', encoding='utf-8-sig') as csv_file:
        writer = csv.writer(csv_file)
        
        print("PDF File: %s" % pdf_file)
        if isNew:
            writer.writerow(['FileNo', 'FileName', 'Text'])   # write fields name into CSV file head
            
        pdfName = os.path.basename(pdf_file)
        FSeqNo += 1
        contain = ""
        for page in extract_text_by_page(pdf_file):
            contain = contain + page 
            
        writer.writerow([FSeqNo, pdfName, contain])

### main 

In [4]:
pdf_path = "KCC Data/CIS PDF Indiv 1-1 to 2-8/" 
pdfs = glob.glob("{}/*.pdf".format(pdf_path))
newFlag = True
FSeqNo = 0
csv_path = "CisPdf2CsvbyIndiv.csv"
for pdf in pdfs:
    export_as_csv(pdf, csv_path, newFlag)
    newFlag = False

PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000516-1-蘇炎城.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000516-2-張勝富.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000516-3-吳益政.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000516-4-鄭光峰.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000517-1-林國正.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000517-2-李雅靜.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000517-3-郭建盟.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000517-4-翁瑞珠.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000518-1-朱信強.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000518-2-李眉蓁.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000518-3-康裕成.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000518-4-顏曉菁.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000519-1-黃天煌.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000519-2-林富寶.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-1-1000519-3-張豐藤.pdf
PDF File: 

PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010515-2-蔡昌達.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010515-3-康裕成.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010516-1-陳政聞.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010516-2-陳慧文.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010516-3-張文瑞.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010517-1-連立堅黃淑美.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010517-3-錢聖武.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010518-1-蘇琦莉.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010518-2-林武忠.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010518-3-吳利成.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010521-1-徐榮延.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010521-2-吳益政.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010521-3-蔡金晏.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010522-1-曾俊傑.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-3-1010522-2-陳玫娟.pdf
PDF Fil

PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020521-1-陳麗珍.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020521-2-黃淑美.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020521-3-黃石龍.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020522-1-張豐藤.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020522-2-曾俊傑陳玫娟.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020523-1-洪平朗.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020523-2-陳明澤.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020523-3-唐惠美.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020524-1-陳粹鑾.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020524-2-曾麗燕.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020524-3-張文瑞.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020527-1-莊啟旺.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020527-2-連立堅.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020527-3-童燕珍.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-5-1020528-1-陳信瑜.pdf
PDF Fil

PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030520-1-黃柏霖.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030520-2-李眉蓁.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030520-3-林富寶.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030521-1-曾麗燕.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030521-2-吳益政.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030521-3-俄鄧殷艾.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030522-1-韓賜村.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030522-2-林義迪.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030523-1-連立堅.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030523-2-陳麗娜.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030523-3-張文瑞.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030526-1-黃天煌.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030526-2-吳利成.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030526-3-藍星木.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\1-7-1030527-1-林芳如.pdf
PDF File:

PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040522-1-陳麗珍.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040522-2-陳粹鑾.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040522-3-鍾盛有.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040525-1-李雨庭.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040525-2-高閔琳.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040525-3-黃天煌.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040526-1-黃紹庭.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040526-2-許崑源.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040526-3-黃柏霖.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040527-1-陳明澤.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040527-2-張漢忠.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040527-3-方信淵.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040528-1-林武忠.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040528-2-王耀裕.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-1-1040528-3-李長生.pdf
PDF File: 

PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050602-2-陳玫娟.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050602-3-曾俊傑.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050602-4-黃天煌.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050603-1-伊斯坦大貝雅夫正福.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050603-2-李雨庭.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050603-3-何權峰.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050603-4-高閔琳.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050604-1-張豐藤.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050604-2-方信淵.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050604-3-李眉蓁.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050604-4-張勝富.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050606-1-林武忠.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050606-2-鄭新助.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050606-3-劉馨正.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-3-1050606-4-邱俊憲.pdf
PDF 

PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060518-4-陳麗珍.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060519-1-邱俊憲.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060519-2-李喬如.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060519-3-高閔琳.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060522-1-陳慧文.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060522-2-簡煥宗.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060522-3-李眉蓁.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060522-4-李長生.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060523-1-陳麗娜.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060523-2-伊斯坦大貝雅夫正福.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060523-3-黃紹庭.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060523-4-黃香菽.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060524-1-鄭新助.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060524-2-陳粹鑾.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-5-1060524-3-陸淑美.pdf
PDF 

PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070510-1-蔡昌達.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070510-2-羅鼎城.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070510-3-陳慧文.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070510-4-李眉蓁.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070511-1-簡煥宗.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070511-2-黃天煌.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070511-3-鄭新助.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070511-4-黃柏霖.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070514-1-陳麗娜.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070514-2-李順進.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070514-3-周鍾.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070514-4-黃紹庭.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070515-1-沈英章.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070515-2-許慧玉.pdf
PDF File: KCC Data/CIS PDF Indiv 1-1 to 2-8\2-7-1070515-3-曾俊傑.pdf
PDF File: K