## [Extract Text](https://towardsdatascience.com/extracting-text-from-pdf-files-with-python-a-comprehensive-guide-9fc4003d517)

## [Language Detect](https://github.com/zafercavdar/fasttext-langdetect) 

In [1]:
from io import StringIO

import os
import re
import PyPDF2
from pdfminer.high_level import extract_pages, extract_text
from pdfminer.layout import LTTextContainer, LTChar, LTRect, LTFigure, LAParams
from ftlangdetect import detect
import pycld2 as cld2

In [14]:
def collect_clear_text(text_list):
    text_list = [re.sub(r'\s+', ' ', text_.strip()).strip() for text_ in text_list]
    text = ' '.join(text_list)
    return text.strip()

def contents_references_rmv(text_per_page: dict,pages = 30, char_num = 1500, years_num = 6):
    def less_char_drop(text,char_num):
        if len(text) < char_num: return True
    def dot_drop(text):
        if (len(text) > char_num) and ((text.count(".") > 40) or (text.count("…")> 10) or (text.count("_")> 10) or (text.count("-")>10)) : return True
    def referencesYear_drop(text):
        year_list = list(range(1900,2025))
        year_count = 0
        for year in year_list:
            if str(year) in text:
                year_count += 1
        if year_count > years_num: return True
    total_pages = len(text_per_page)
    obs_numbers = list(range(0,pages)) + list(range(total_pages-pages,total_pages))
    for page_num in obs_numbers:
        page_word = f'Page_{page_num}'
        if page_num < pages:
            try:
                if (less_char_drop(text_per_page[page_word],char_num)) or (dot_drop(text_per_page[page_word]))  : del text_per_page[page_word]   
            except KeyError:
                continue
        else:
            if (less_char_drop(text_per_page[page_word],char_num)) or (referencesYear_drop(text_per_page[page_word]))  : del text_per_page[page_word]
    return text_per_page
    
def remove_eng(text_per_page: dict):
    dict_keys = list(text_per_page.keys())
    for keys in dict_keys: 
        text = text_per_page[keys]
        try:
            isReliable, textBytesFound, details = cld2.detect(text)
        except Exception as e:
            del text_per_page[keys]
            continue    
        lang = details[0][1]
        score = details[0][2]
        #print(keys, lang)
        if (lang == 'tr') and (score < 0.91):
            del text_per_page[keys]
        elif lang != "tr":
            del text_per_page[keys]
    return text_per_page

In [None]:
pdf_listDir = os.listdir("YOK_download_pdf")
for pdf in pdf_listDir:
    text_per_page = {}
    pdf_path = f'YOK_download_pdf/{pdf}'
    try:
        for pagenum, page in enumerate(extract_pages(pdf_path, laparams=LAParams(line_margin=2))):
            page_text = []
            page_elements = [(element.y1, element) for element in page._objs]
            page_elements.sort(key=lambda a: a[0], reverse=True)
            for i, component in enumerate(page_elements):
                element = component[1]
                if isinstance(element, LTTextContainer):
                    line_text = element.get_text()
                    page_text.append(line_text)
            dctkey = 'Page_' + str(pagenum)
            text_per_page[dctkey] = collect_clear_text(page_text)
    except TypeError as e:
        print(f"Error processing {pdf}: {e}")
        os.remove(pdf_path)
        continue
    except ValueError as e:
        print(f"Error processing {pdf}: {e}")
        os.remove(pdf_path)
        continue
    rawTotal = len(text_per_page)
    text_per_page = contents_references_rmv(text_per_page=text_per_page)
    text_per_page = remove_eng(text_per_page=text_per_page)
    """
    try:
        text_per_page = remove_eng(text_per_page=text_per_page)
    except Exception as e:
        print(f"Error in remove_eng for {pdf}: {e}")
        os.remove(pdf_path)
        continue
    """
    clearedTotal = len(text_per_page)
    if clearedTotal < 10:
        print("Bozuk pdf:", pdf)
    print("PDF:", pdf, "Raw_Pages:", rawTotal, "Cleared_Total:", clearedTotal)
    with open(f"YOK_TEXT/{pdf.split('.')[0]}.txt" , 'w+', encoding="utf-8") as txt:
        for text in text_per_page.values():
            txt.write(text)
            txt.write('\n')
    os.rename( f"YOK_download_pdf/{pdf}", f"YOK_download_pdf_fin/finished_{pdf}")

Error processing 288172.pdf: bytes must be in range(0, 256)
PDF: 288182.pdf Raw_Pages: 154 Cleared_Total: 124
PDF: 288187.pdf Raw_Pages: 280 Cleared_Total: 201
PDF: 288190.pdf Raw_Pages: 246 Cleared_Total: 186
PDF: 288199.pdf Raw_Pages: 127 Cleared_Total: 83
PDF: 288203.pdf Raw_Pages: 169 Cleared_Total: 129
PDF: 288204.pdf Raw_Pages: 142 Cleared_Total: 108
PDF: 288205.pdf Raw_Pages: 151 Cleared_Total: 113
PDF: 288207.pdf Raw_Pages: 135 Cleared_Total: 97
PDF: 288228.pdf Raw_Pages: 159 Cleared_Total: 130
PDF: 288239.pdf Raw_Pages: 137 Cleared_Total: 102
PDF: 288246.pdf Raw_Pages: 97 Cleared_Total: 72
PDF: 288247.pdf Raw_Pages: 254 Cleared_Total: 181
PDF: 288252.pdf Raw_Pages: 118 Cleared_Total: 94
PDF: 288254.pdf Raw_Pages: 207 Cleared_Total: 160
PDF: 288285.pdf Raw_Pages: 181 Cleared_Total: 139
PDF: 288286.pdf Raw_Pages: 204 Cleared_Total: 162
PDF: 288287.pdf Raw_Pages: 202 Cleared_Total: 166
PDF: 288299.pdf Raw_Pages: 78 Cleared_Total: 48
PDF: 288302.pdf Raw_Pages: 166 Cleared_Total: 1

The PDF <_io.BufferedReader name='YOK_download_pdf/289394.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


PDF: 289384.pdf Raw_Pages: 75 Cleared_Total: 51


The PDF <_io.BufferedReader name='YOK_download_pdf/289395.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


PDF: 289394.pdf Raw_Pages: 337 Cleared_Total: 298
PDF: 289395.pdf Raw_Pages: 259 Cleared_Total: 224
PDF: 289396.pdf Raw_Pages: 149 Cleared_Total: 104
PDF: 289398.pdf Raw_Pages: 131 Cleared_Total: 98
PDF: 289400.pdf Raw_Pages: 199 Cleared_Total: 160
PDF: 289410.pdf Raw_Pages: 108 Cleared_Total: 69
PDF: 289416.pdf Raw_Pages: 150 Cleared_Total: 112
PDF: 289417.pdf Raw_Pages: 165 Cleared_Total: 131
PDF: 289427.pdf Raw_Pages: 156 Cleared_Total: 115
PDF: 289428.pdf Raw_Pages: 184 Cleared_Total: 135
PDF: 289434.pdf Raw_Pages: 87 Cleared_Total: 46
PDF: 289436.pdf Raw_Pages: 73 Cleared_Total: 54
PDF: 289442.pdf Raw_Pages: 110 Cleared_Total: 80
PDF: 289479.pdf Raw_Pages: 99 Cleared_Total: 71
PDF: 289484.pdf Raw_Pages: 114 Cleared_Total: 84
PDF: 289490.pdf Raw_Pages: 133 Cleared_Total: 98
PDF: 289495.pdf Raw_Pages: 118 Cleared_Total: 83
Bozuk pdf: 289496.pdf
PDF: 289496.pdf Raw_Pages: 119 Cleared_Total: 0


Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The start and end byte have different lengths.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The difference between the start and end offsets does not match the code length.


PDF: 289498.pdf Raw_Pages: 150 Cleared_Total: 114


Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The start and end byte have different lengths.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The difference between the start and end offsets does not match the code length.


PDF: 289500.pdf Raw_Pages: 111 Cleared_Total: 55
PDF: 289501.pdf Raw_Pages: 152 Cleared_Total: 109
PDF: 289503.pdf Raw_Pages: 113 Cleared_Total: 85
PDF: 289506.pdf Raw_Pages: 163 Cleared_Total: 137
PDF: 289512.pdf Raw_Pages: 135 Cleared_Total: 94
PDF: 289516.pdf Raw_Pages: 106 Cleared_Total: 75
PDF: 289518.pdf Raw_Pages: 185 Cleared_Total: 145
PDF: 289523.pdf Raw_Pages: 130 Cleared_Total: 96
PDF: 289526.pdf Raw_Pages: 128 Cleared_Total: 98
PDF: 289531.pdf Raw_Pages: 195 Cleared_Total: 160
Bozuk pdf: 289603.pdf
PDF: 289603.pdf Raw_Pages: 129 Cleared_Total: 0
PDF: 289604.pdf Raw_Pages: 103 Cleared_Total: 64
PDF: 289605.pdf Raw_Pages: 117 Cleared_Total: 69
PDF: 289607.pdf Raw_Pages: 128 Cleared_Total: 99
PDF: 289667.pdf Raw_Pages: 123 Cleared_Total: 103
PDF: 289717.pdf Raw_Pages: 329 Cleared_Total: 275
PDF: 289718.pdf Raw_Pages: 147 Cleared_Total: 108
Bozuk pdf: 289719.pdf
PDF: 289719.pdf Raw_Pages: 169 Cleared_Total: 0
PDF: 289729.pdf Raw_Pages: 108 Cleared_Total: 72
PDF: 289736.pdf Raw_

The PDF <_io.BufferedReader name='YOK_download_pdf/291569.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


PDF: 291563.pdf Raw_Pages: 378 Cleared_Total: 326
PDF: 291569.pdf Raw_Pages: 139 Cleared_Total: 96
PDF: 291572.pdf Raw_Pages: 267 Cleared_Total: 228
PDF: 291573.pdf Raw_Pages: 182 Cleared_Total: 161
PDF: 291589.pdf Raw_Pages: 228 Cleared_Total: 205
PDF: 291590.pdf Raw_Pages: 158 Cleared_Total: 122
PDF: 291594.pdf Raw_Pages: 136 Cleared_Total: 99
Bozuk pdf: 291598.pdf
PDF: 291598.pdf Raw_Pages: 149 Cleared_Total: 0
PDF: 291599.pdf Raw_Pages: 168 Cleared_Total: 139
Bozuk pdf: 291601.pdf
PDF: 291601.pdf Raw_Pages: 272 Cleared_Total: 0
PDF: 291607.pdf Raw_Pages: 226 Cleared_Total: 183
PDF: 291609.pdf Raw_Pages: 93 Cleared_Total: 64
PDF: 291611.pdf Raw_Pages: 202 Cleared_Total: 147
PDF: 291615.pdf Raw_Pages: 227 Cleared_Total: 180
Bozuk pdf: 291620.pdf
PDF: 291620.pdf Raw_Pages: 293 Cleared_Total: 0
PDF: 291625.pdf Raw_Pages: 147 Cleared_Total: 120
PDF: 291626.pdf Raw_Pages: 354 Cleared_Total: 308
PDF: 291627.pdf Raw_Pages: 177 Cleared_Total: 146
PDF: 291652.pdf Raw_Pages: 137 Cleared_Total

Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The start and end byte have different lengths.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The difference between the start and end offsets does not match the code length.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The start and end byte have different lengths.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The difference between the start and end offsets does not match the code length.


PDF: 294108.pdf Raw_Pages: 201 Cleared_Total: 170
PDF: 294131.pdf Raw_Pages: 125 Cleared_Total: 107
PDF: 294141.pdf Raw_Pages: 174 Cleared_Total: 152
PDF: 294408.pdf Raw_Pages: 120 Cleared_Total: 85
PDF: 294425.pdf Raw_Pages: 133 Cleared_Total: 91
PDF: 294427.pdf Raw_Pages: 144 Cleared_Total: 108
PDF: 294430.pdf Raw_Pages: 159 Cleared_Total: 127
PDF: 294445.pdf Raw_Pages: 154 Cleared_Total: 114
PDF: 294447.pdf Raw_Pages: 135 Cleared_Total: 106
PDF: 294460.pdf Raw_Pages: 238 Cleared_Total: 202
PDF: 294481.pdf Raw_Pages: 93 Cleared_Total: 58
PDF: 294483.pdf Raw_Pages: 169 Cleared_Total: 148
PDF: 294551.pdf Raw_Pages: 127 Cleared_Total: 84
PDF: 294591.pdf Raw_Pages: 132 Cleared_Total: 109
PDF: 294636.pdf Raw_Pages: 220 Cleared_Total: 172
PDF: 294642.pdf Raw_Pages: 208 Cleared_Total: 181
PDF: 294647.pdf Raw_Pages: 116 Cleared_Total: 80
PDF: 294660.pdf Raw_Pages: 203 Cleared_Total: 160
PDF: 294661.pdf Raw_Pages: 131 Cleared_Total: 108
PDF: 294663.pdf Raw_Pages: 114 Cleared_Total: 84
PDF: 29

The PDF <_io.BufferedReader name='YOK_download_pdf/320308.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


PDF: 320286.pdf Raw_Pages: 155 Cleared_Total: 126
PDF: 320308.pdf Raw_Pages: 76 Cleared_Total: 58
PDF: 320312.pdf Raw_Pages: 149 Cleared_Total: 112
PDF: 320327.pdf Raw_Pages: 118 Cleared_Total: 81
PDF: 320371.pdf Raw_Pages: 399 Cleared_Total: 353
PDF: 320373.pdf Raw_Pages: 361 Cleared_Total: 288
PDF: 320379.pdf Raw_Pages: 148 Cleared_Total: 102
PDF: 320462.pdf Raw_Pages: 128 Cleared_Total: 101
PDF: 320466.pdf Raw_Pages: 134 Cleared_Total: 109
PDF: 320467.pdf Raw_Pages: 211 Cleared_Total: 164
PDF: 320468.pdf Raw_Pages: 197 Cleared_Total: 156
PDF: 320469.pdf Raw_Pages: 262 Cleared_Total: 180
Bozuk pdf: 320486.pdf
PDF: 320486.pdf Raw_Pages: 160 Cleared_Total: 0
PDF: 320494.pdf Raw_Pages: 285 Cleared_Total: 231
Bozuk pdf: 320508.pdf
PDF: 320508.pdf Raw_Pages: 164 Cleared_Total: 0
PDF: 320518.pdf Raw_Pages: 262 Cleared_Total: 200
PDF: 320519.pdf Raw_Pages: 123 Cleared_Total: 84
PDF: 320521.pdf Raw_Pages: 205 Cleared_Total: 146
PDF: 320523.pdf Raw_Pages: 122 Cleared_Total: 82
PDF: 320883.pdf

The PDF <_io.BufferedReader name='YOK_download_pdf/326788.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


PDF: 326785.pdf Raw_Pages: 142 Cleared_Total: 100
Bozuk pdf: 326787.pdf
PDF: 326787.pdf Raw_Pages: 91 Cleared_Total: 0
PDF: 326788.pdf Raw_Pages: 90 Cleared_Total: 66
PDF: 326805.pdf Raw_Pages: 138 Cleared_Total: 96
PDF: 326806.pdf Raw_Pages: 190 Cleared_Total: 167
PDF: 326808.pdf Raw_Pages: 201 Cleared_Total: 159
PDF: 326811.pdf Raw_Pages: 150 Cleared_Total: 113
PDF: 326813.pdf Raw_Pages: 133 Cleared_Total: 109
PDF: 326819.pdf Raw_Pages: 283 Cleared_Total: 244
PDF: 326821.pdf Raw_Pages: 287 Cleared_Total: 243
PDF: 326823.pdf Raw_Pages: 249 Cleared_Total: 191
PDF: 326878.pdf Raw_Pages: 156 Cleared_Total: 133
PDF: 326891.pdf Raw_Pages: 146 Cleared_Total: 107
PDF: 326897.pdf Raw_Pages: 193 Cleared_Total: 160
PDF: 326905.pdf Raw_Pages: 166 Cleared_Total: 133
PDF: 326908.pdf Raw_Pages: 111 Cleared_Total: 86
PDF: 326947.pdf Raw_Pages: 215 Cleared_Total: 182
PDF: 326950.pdf Raw_Pages: 158 Cleared_Total: 125
PDF: 326961.pdf Raw_Pages: 186 Cleared_Total: 151
PDF: 327177.pdf Raw_Pages: 69 Clear

The PDF <_io.BufferedReader name='YOK_download_pdf/328537.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


PDF: 328532.pdf Raw_Pages: 159 Cleared_Total: 112
PDF: 328537.pdf Raw_Pages: 472 Cleared_Total: 368
Bozuk pdf: 328539.pdf
PDF: 328539.pdf Raw_Pages: 124 Cleared_Total: 0
Bozuk pdf: 328547.pdf
PDF: 328547.pdf Raw_Pages: 91 Cleared_Total: 0
PDF: 328548.pdf Raw_Pages: 229 Cleared_Total: 180
Bozuk pdf: 328562.pdf
PDF: 328562.pdf Raw_Pages: 106 Cleared_Total: 0
PDF: 328564.pdf Raw_Pages: 229 Cleared_Total: 179
PDF: 328575.pdf Raw_Pages: 377 Cleared_Total: 329
PDF: 328577.pdf Raw_Pages: 142 Cleared_Total: 119
PDF: 328590.pdf Raw_Pages: 146 Cleared_Total: 119
PDF: 328595.pdf Raw_Pages: 228 Cleared_Total: 185
PDF: 328599.pdf Raw_Pages: 165 Cleared_Total: 137
PDF: 328600.pdf Raw_Pages: 190 Cleared_Total: 150
PDF: 328606.pdf Raw_Pages: 126 Cleared_Total: 97
PDF: 328613.pdf Raw_Pages: 301 Cleared_Total: 252
PDF: 328618.pdf Raw_Pages: 213 Cleared_Total: 169
PDF: 328627.pdf Raw_Pages: 120 Cleared_Total: 80
PDF: 328631.pdf Raw_Pages: 230 Cleared_Total: 190
PDF: 328642.pdf Raw_Pages: 152 Cleared_Tota

The PDF <_io.BufferedReader name='YOK_download_pdf/339073.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


PDF: 339057.pdf Raw_Pages: 189 Cleared_Total: 143
PDF: 339073.pdf Raw_Pages: 104 Cleared_Total: 69
PDF: 339089.pdf Raw_Pages: 87 Cleared_Total: 55
PDF: 339105.pdf Raw_Pages: 114 Cleared_Total: 84
PDF: 339106.pdf Raw_Pages: 156 Cleared_Total: 117
Bozuk pdf: 339109.pdf
PDF: 339109.pdf Raw_Pages: 191 Cleared_Total: 0
PDF: 339122.pdf Raw_Pages: 103 Cleared_Total: 64
PDF: 339137.pdf Raw_Pages: 288 Cleared_Total: 251
Bozuk pdf: 339140.pdf
PDF: 339140.pdf Raw_Pages: 313 Cleared_Total: 0
PDF: 339165.pdf Raw_Pages: 145 Cleared_Total: 95
PDF: 339168.pdf Raw_Pages: 210 Cleared_Total: 163
PDF: 339176.pdf Raw_Pages: 307 Cleared_Total: 277
PDF: 339192.pdf Raw_Pages: 154 Cleared_Total: 117
PDF: 339205.pdf Raw_Pages: 239 Cleared_Total: 123
PDF: 339213.pdf Raw_Pages: 258 Cleared_Total: 226
PDF: 339216.pdf Raw_Pages: 236 Cleared_Total: 196
PDF: 339222.pdf Raw_Pages: 105 Cleared_Total: 70
PDF: 339239.pdf Raw_Pages: 103 Cleared_Total: 48
PDF: 339247.pdf Raw_Pages: 131 Cleared_Total: 95
PDF: 339250.pdf Raw

Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The start and end byte have different lengths.


PDF: 339286.pdf Raw_Pages: 102 Cleared_Total: 70


Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The start and end byte have different lengths.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The difference between the start and end offsets does not match the code length.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The start and end byte have different lengths.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The difference between the start and end offsets does not match the code length.
Ignoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. The start and end byte have different lengths.
Ignoring (part of) ToU

PDF: 339289.pdf Raw_Pages: 511 Cleared_Total: 465
PDF: 339298.pdf Raw_Pages: 200 Cleared_Total: 163
PDF: 339299.pdf Raw_Pages: 145 Cleared_Total: 117
PDF: 339300.pdf Raw_Pages: 298 Cleared_Total: 228
PDF: 339303.pdf Raw_Pages: 124 Cleared_Total: 85
PDF: 339307.pdf Raw_Pages: 156 Cleared_Total: 133
PDF: 339313.pdf Raw_Pages: 158 Cleared_Total: 113
PDF: 339316.pdf Raw_Pages: 159 Cleared_Total: 126
PDF: 339320.pdf Raw_Pages: 159 Cleared_Total: 134
PDF: 339331.pdf Raw_Pages: 80 Cleared_Total: 59
PDF: 339332.pdf Raw_Pages: 178 Cleared_Total: 142
PDF: 339333.pdf Raw_Pages: 122 Cleared_Total: 88
PDF: 339338.pdf Raw_Pages: 195 Cleared_Total: 163
PDF: 339347.pdf Raw_Pages: 188 Cleared_Total: 154
PDF: 339768.pdf Raw_Pages: 113 Cleared_Total: 89
PDF: 339793.pdf Raw_Pages: 136 Cleared_Total: 98
PDF: 339835.pdf Raw_Pages: 121 Cleared_Total: 96
PDF: 339836.pdf Raw_Pages: 196 Cleared_Total: 156
PDF: 339844.pdf Raw_Pages: 227 Cleared_Total: 184
PDF: 339846.pdf Raw_Pages: 147 Cleared_Total: 115
PDF: 34

The PDF <_io.BufferedReader name='YOK_download_pdf/344781.pdf'> contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case


PDF: 344776.pdf Raw_Pages: 202 Cleared_Total: 108
PDF: 344781.pdf Raw_Pages: 255 Cleared_Total: 203
PDF: 344783.pdf Raw_Pages: 254 Cleared_Total: 221
PDF: 344787.pdf Raw_Pages: 103 Cleared_Total: 75
PDF: 344802.pdf Raw_Pages: 112 Cleared_Total: 86
PDF: 344810.pdf Raw_Pages: 164 Cleared_Total: 130
PDF: 344817.pdf Raw_Pages: 88 Cleared_Total: 52
PDF: 344826.pdf Raw_Pages: 350 Cleared_Total: 299
PDF: 344835.pdf Raw_Pages: 197 Cleared_Total: 169
PDF: 344901.pdf Raw_Pages: 329 Cleared_Total: 279
PDF: 344903.pdf Raw_Pages: 296 Cleared_Total: 252
PDF: 344904.pdf Raw_Pages: 232 Cleared_Total: 204
PDF: 344906.pdf Raw_Pages: 243 Cleared_Total: 213
PDF: 344909.pdf Raw_Pages: 252 Cleared_Total: 209
PDF: 344911.pdf Raw_Pages: 211 Cleared_Total: 172
PDF: 344917.pdf Raw_Pages: 189 Cleared_Total: 148
PDF: 344918.pdf Raw_Pages: 289 Cleared_Total: 232
PDF: 344939.pdf Raw_Pages: 126 Cleared_Total: 87
PDF: 344943.pdf Raw_Pages: 91 Cleared_Total: 60
PDF: 344949.pdf Raw_Pages: 97 Cleared_Total: 58
PDF: 3449