# ***Libraries & Tools***

In [1]:
import fitz  # PyMuPDF
import requests
import io
import re
import os
import math
from tqdm import tqdm 
from datetime import datetime
from concurrent.futures import ProcessPoolExecutor

# ***PDF Extraction & Analysis***

## ***Helper Functions & General Variables***

In [5]:
def download_pdf(paper_id):
    response = requests.get(f"https://arxiv.org/pdf/{paper_id}")
    if response.status_code == 200:
        return io.BytesIO(response.content)
    else:
        #raise Exception(f"Failed to download PDF. Status code: {response.status_code}")
        return None 

def extract_text_from_pdf(pdf):
    try:
        doc = fitz.open(pdf) # If we're downloading th PDFs use: fitz.open("pdf", pdf)
        text = ""
        for page in doc:
            text += page.get_text("text") + "\n"
        return text if text.strip() else None  # Return None if empty
    except Exception as e:
        #print(f"❌ Failed to extract text from {pdf}: {e}")
        return None  # Failed to extract text

def extract_introduction(text, word_limit=500):
    lines = text.split("\n")
    introduction_found = False
    extracted_text = []
    word_count = 0
    
    #section_pattern = re.compile(r'^\s*(\d+\.?|[IVXLCDM]+\.?|[A-Z]\.?)\s+(INTRODUCTION|Introduction|introduction)\s*$')
    section_pattern = re.compile(r'^\s*(?:\d+\.?|[IVXLCDM]+\.?|[A-Z]\.?)?\s*(INTRODUCTION|Introduction|introduction)\s*$')

    
    for line in lines:
        if not introduction_found:
            if section_pattern.match(line):
                introduction_found = True
        else:
            #words = line.split()
            words = re.findall(r"\b\w+\b", line)
            try:
                if word_count + len(words) > word_limit:
                    # Find the position of the last word to include
                    remaining_words = word_limit - word_count
                    last_word = words[remaining_words - 1]  # Last word to keep

                    # Find where this last word appears in the original line
                    last_word_index = line.find(last_word) + len(last_word)

                    # Append the original line up to this point (preserving punctuation)
                    extracted_text.append(line[:last_word_index])
                    break
                else:
                    extracted_text.append(line)
                    word_count += len(words)
            except Exception:
                break  
    
    cleaned_text = re.sub(r'(\w+)-\s+(\w+)', r'\1\2', " ".join(extracted_text))
    return cleaned_text.strip() if cleaned_text.strip() else None  # None if intro not found


def update_output_file(output_file, successful_retries):
    # Read the file into a list
    with open(output_file, "r") as f:
        lines = f.readlines()  # Read all lines

    # Update the correct indices
    for index, new_text in successful_retries:
        if 0 <= index < len(lines):  # Ensure index is valid
            lines[index] = new_text.strip() + "\n" 
    
    # Write back the updated lines
    with open(output_file, "w") as f:
        f.writelines(lines)


def concatenate_files(file1, file2, output_file):
    with open(file1, 'r', encoding='utf-8') as f1, open(file2, 'r', encoding='utf-8') as f2, open(output_file, 'w', encoding='utf-8') as out:
        for line1, line2 in zip(f1, f2):
            if "FAILED" in line2:
                out.write(f"{line1.strip()}\n")
            else:
                out.write(f"{line1.strip()}. {line2.strip()}\n")

In [4]:
id_file = "graph-v2/Node_IDs.txt"  # File containing one ID per line

output_file = "graph-v2/ANC_500.txt"

## ***Serial***

In [6]:
# Function to process PDFs stored in directories
def process_pdfs(pdf_dirs, results):
    successes = 0
    failures = []

    for directory in pdf_dirs:
        for filename in tqdm(os.listdir(directory), desc="Processing files", unit="file"):
            if filename.endswith(".pdf"):
                pdf_path = os.path.join(directory, filename)

                text = extract_text_from_pdf(pdf_path)
                if text is None:
                    #print(f"Skipping {filename}: Failed to extract text")
                    failures.append(filename.replace("_", "/").replace(".pdf", ""))
                    continue

                intro_text = extract_introduction(text)
                if intro_text is None:
                    #print(f"Skipping {filename}: No introduction found")
                    failures.append(filename.replace("_", "/").replace(".pdf", ""))
                    continue

                results[filename.replace("_", "/").replace(".pdf", "")] = intro_text
                successes += 1
                #print(f"✅ Extracted Introduction from {filename}")

    return results, successes, failures  # Dictionary { "filename.pdf": "Introduction text" }



# Function to process individual papers
def process_pdf(paper_id):
    try:
        pdf_stream = download_pdf(paper_id)
        if not pdf_stream:
            print(f"{paper_id} failed to download")
            return None
        
        text = extract_text_from_pdf(pdf_stream)
        if text is None: 
            print(f"Failed to extract text from {paper_id}")
            return None
        
        introduction_text = extract_introduction(text)
        if introduction_text is None:
            print(f"Failed to extract text from introduction section for {paper_id}")
            return None
        
        return introduction_text
    except Exception as e:
        return None #f"Error: {str(e)}"


In [None]:
paper_ids = ["0704.1274", "0704.1028", "0704.0954", "0704.1308"] # For selected papers
successful_retries = []

for paper_id in paper_ids: # for idx, paper_id in failed_papers[0]:
    intro_text = process_pdf(paper_id)
    if intro_text:
        print(f"\n==================================================")
        print(intro_text)
        print(f"==================================================\n")
        #successful_retries.append((idx, intro_text)) # "successful_retries" will be used with "failed_papers" 

In [7]:
pdf_dirs = ['Papers 1', 'Papers 2', 'Papers 3', 'Papers 4', 'Papers 5', 'Papers 6']

results = {}

with open(id_file, "r") as f:
    paper_ids = {line.strip() for line in f}  # Use a set for faster lookups

for paper_id in paper_ids:
    results[paper_id] = "FAILED"

In [8]:
paper_ids

{'1809.01604',
 'cmp-lg/9406023',
 '1307.1630',
 'cs/0511028',
 '1507.05122',
 '1709.01305',
 '1811.01721',
 '1307.6458',
 '1807.07247',
 '1812.10924',
 '1502.06895',
 '1605.02401',
 '1206.5253',
 '1711.04731',
 '1811.08764',
 '1703.08580',
 '1806.03332',
 '1808.07733',
 '1605.06492',
 '1901.03775',
 '1812.02207',
 '1803.00384',
 '1405.5864',
 '1806.06237',
 '1812.07858',
 '1504.02141',
 '1603.05359',
 '1803.06077',
 '1803.08554',
 '1604.04879',
 '0711.1056',
 '1804.02508',
 '1802.05581',
 '1808.06645',
 '1402.2224',
 '1509.01951',
 '1510.08583',
 '1805.01220',
 '1501.06206',
 '1809.06065',
 '0911.4530',
 '1704.01523',
 '1809.08343',
 '1811.09678',
 '1002.2780',
 '1204.4249',
 '1610.00843',
 '1511.09180',
 '1808.04816',
 '1710.08167',
 '1511.06566',
 '1809.00970',
 '1808.07456',
 '1901.06086',
 '1309.5843',
 '1705.09407',
 '0801.3272',
 '1605.09519',
 '1303.4085',
 '1809.01733',
 '1809.10934',
 '1707.01825',
 '1603.02366',
 '1806.08698',
 '1504.00353',
 '1801.01627',
 '1804.07942',
 '1

In [9]:
results

{'1809.01604': 'FAILED',
 'cmp-lg/9406023': 'FAILED',
 '1307.1630': 'FAILED',
 'cs/0511028': 'FAILED',
 '1507.05122': 'FAILED',
 '1709.01305': 'FAILED',
 '1811.01721': 'FAILED',
 '1307.6458': 'FAILED',
 '1807.07247': 'FAILED',
 '1812.10924': 'FAILED',
 '1502.06895': 'FAILED',
 '1605.02401': 'FAILED',
 '1206.5253': 'FAILED',
 '1711.04731': 'FAILED',
 '1811.08764': 'FAILED',
 '1703.08580': 'FAILED',
 '1806.03332': 'FAILED',
 '1808.07733': 'FAILED',
 '1605.06492': 'FAILED',
 '1901.03775': 'FAILED',
 '1812.02207': 'FAILED',
 '1803.00384': 'FAILED',
 '1405.5864': 'FAILED',
 '1806.06237': 'FAILED',
 '1812.07858': 'FAILED',
 '1504.02141': 'FAILED',
 '1603.05359': 'FAILED',
 '1803.06077': 'FAILED',
 '1803.08554': 'FAILED',
 '1604.04879': 'FAILED',
 '0711.1056': 'FAILED',
 '1804.02508': 'FAILED',
 '1802.05581': 'FAILED',
 '1808.06645': 'FAILED',
 '1402.2224': 'FAILED',
 '1509.01951': 'FAILED',
 '1510.08583': 'FAILED',
 '1805.01220': 'FAILED',
 '1501.06206': 'FAILED',
 '1809.06065': 'FAILED',
 '

In [10]:
print(len(paper_ids))
print(len(results))

52596
52596


In [11]:
results, successes, failures = process_pdfs(pdf_dirs, results)

print(f'Successes: {successes}. Failures: {len(failures)}. Node IDs: {len(paper_ids)}.')

Processing files:   2%|▏         | 215/9557 [00:19<19:42,  7.90file/s]

MuPDF error: syntax error: could not parse color space (275 0 R)

MuPDF error: syntax error: could not parse color space (392 0 R)

MuPDF error: syntax error: could not parse color space (694 0 R)

MuPDF error: syntax error: could not parse color space (1005 0 R)

MuPDF error: syntax error: could not parse color space (1440 0 R)



Processing files:   6%|▌         | 587/9557 [00:49<08:54, 16.79file/s]

MuPDF error: syntax error: could not parse color space (427 0 R)

MuPDF error: syntax error: could not parse color space (660 0 R)

MuPDF error: syntax error: could not parse color space (834 0 R)

MuPDF error: syntax error: could not parse color space (855 0 R)

MuPDF error: syntax error: could not parse color space (875 0 R)



Processing files: 100%|██████████| 9557/9557 [12:27<00:00, 12.79file/s]  
Processing files:   5%|▌         | 455/8952 [00:31<09:23, 15.08file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04684pt'



Processing files:   9%|▊         | 778/8952 [00:52<08:10, 16.66file/s]

MuPDF error: syntax error: could not parse color space (382 0 R)



Processing files:  10%|█         | 918/8952 [01:07<14:14,  9.40file/s]  

MuPDF error: syntax error: could not parse color space (134 0 R)



Processing files:  16%|█▌        | 1398/8952 [01:41<09:56, 12.67file/s]

MuPDF error: syntax error: could not parse color space (101 0 R)

MuPDF error: syntax error: could not parse color space (260 0 R)

MuPDF error: syntax error: could not parse color space (605 0 R)



Processing files:  16%|█▋        | 1470/8952 [01:46<08:50, 14.10file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'a0'



Processing files:  21%|██▏       | 1919/8952 [02:20<07:15, 16.16file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  22%|██▏       | 1938/8952 [02:21<07:18, 16.00file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04675pt'



Processing files:  28%|██▊       | 2546/8952 [03:35<10:01, 10.65file/s]  

MuPDF error: syntax error: could not parse color space (334 0 R)

MuPDF error: syntax error: could not parse color space (430 0 R)

MuPDF error: syntax error: could not parse color space (685 0 R)



Processing files:  34%|███▎      | 3017/8952 [04:10<05:31, 17.89file/s]

MuPDF error: syntax error: could not parse color space (490 0 R)



Processing files:  41%|████      | 3690/8952 [04:59<04:02, 21.68file/s]  

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04684pt'



Processing files:  47%|████▋     | 4170/8952 [05:32<04:40, 17.02file/s]

MuPDF error: syntax error: could not parse color space (499 0 R)

MuPDF error: syntax error: could not parse color space (555 0 R)



Processing files:  48%|████▊     | 4265/8952 [05:39<05:00, 15.58file/s]

MuPDF error: syntax error: could not parse color space (604 0 R)



Processing files:  48%|████▊     | 4308/8952 [05:42<06:44, 11.48file/s]

MuPDF error: syntax error: could not parse color space (335 0 R)



Processing files:  48%|████▊     | 4310/8952 [05:43<07:09, 10.80file/s]

MuPDF error: syntax error: could not parse color space (176 0 R)



Processing files:  51%|█████     | 4561/8952 [06:00<04:29, 16.27file/s]

MuPDF error: syntax error: could not parse color space (270 0 R)

MuPDF error: syntax error: could not parse color space (556 0 R)



Processing files:  51%|█████▏    | 4599/8952 [06:04<08:40,  8.36file/s]

MuPDF error: syntax error: could not parse color space (64 0 R)



Processing files:  53%|█████▎    | 4711/8952 [06:14<06:15, 11.30file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: 

Processing files:  54%|█████▍    | 4877/8952 [06:26<04:16, 15.86file/s]

MuPDF error: syntax error: could not parse color space (634 0 R)



Processing files:  56%|█████▌    | 5026/8952 [06:37<04:08, 15.83file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  57%|█████▋    | 5111/8952 [06:43<03:30, 18.22file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  58%|█████▊    | 5182/8952 [06:47<03:02, 20.68file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '415.41023pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '617.42479pt'



Processing files:  60%|█████▉    | 5348/8952 [07:02<03:08, 19.14file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  61%|██████    | 5419/8952 [07:07<03:23, 17.37file/s]

MuPDF error: library error: FT_New_Memory_Face(Times): unknown file format



Processing files:  61%|██████▏   | 5493/8952 [07:12<04:14, 13.58file/s]

MuPDF error: syntax error: could not parse color space (302 0 R)

MuPDF error: syntax error: could not parse color space (437 0 R)

MuPDF error: syntax error: could not parse color space (529 0 R)

MuPDF error: syntax error: could not parse color space (667 0 R)



Processing files:  68%|██████▊   | 6059/8952 [07:51<02:44, 17.59file/s]

MuPDF error: syntax error: could not parse color space (555 0 R)



Processing files:  68%|██████▊   | 6118/8952 [07:55<02:44, 17.18file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.75'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.75'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.75'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.75'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'



Processing files:  72%|███████▏  | 6422/8952 [08:19<02:10, 19.44file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  81%|████████  | 7247/8952 [09:24<01:23, 20.54file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: 

Processing files:  82%|████████▏ | 7319/8952 [09:29<02:05, 12.98file/s]

MuPDF error: syntax error: could not parse color space (1083 0 R)



Processing files:  87%|████████▋ | 7760/8952 [10:03<01:20, 14.74file/s]

MuPDF error: syntax error: could not parse color space (807 0 R)



Processing files:  91%|█████████▏| 8182/8952 [10:38<00:35, 21.58file/s]

MuPDF error: syntax error: could not parse color space (252 0 R)



Processing files:  92%|█████████▏| 8259/8952 [10:46<00:56, 12.31file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: 

Processing files:  95%|█████████▌| 8509/8952 [11:03<00:28, 15.42file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  96%|█████████▌| 8603/8952 [11:12<00:22, 15.53file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'

MuPDF error: syntax error: unknown keyword: 'epdf'

MuPDF error: syntax error: unknown keyword: 'bbox'

MuPDF error: syntax error: unknown keyword: 'clip'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '433.62pt'

MuPDF error: syntax error: unknown keyword: 'image'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '296.30743pt'

MuPDF error: syntax error: unknown keyword: 'epdf'

MuPDF error: syntax error: unknown keyword: 'bbox'

MuPDF error: syntax error: unknown keyword: 'clip'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '325.215pt'

MuPDF error: syntax error

Processing files: 100%|██████████| 8952/8952 [11:37<00:00, 12.84file/s]
Processing files:   5%|▍         | 402/8904 [00:31<06:34, 21.56file/s]

MuPDF error: format error: cmsOpenProfileFromMem failed

MuPDF error: format error: cmsOpenProfileFromMem failed



Processing files:   5%|▍         | 439/8904 [00:33<09:25, 14.97file/s]

MuPDF error: syntax error: could not parse color space (81 0 R)

MuPDF error: syntax error: could not parse color space (368 0 R)



Processing files:   6%|▋         | 560/8904 [00:41<08:37, 16.13file/s]

MuPDF error: syntax error: could not parse color space (1078 0 R)



Processing files:   8%|▊         | 681/8904 [00:50<10:16, 13.33file/s]

MuPDF error: syntax error: could not parse color space (235 0 R)



Processing files:   8%|▊         | 686/8904 [00:50<08:59, 15.24file/s]

MuPDF error: syntax error: could not parse color space (159 0 R)



Processing files:  10%|█         | 929/8904 [01:07<11:21, 11.70file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  15%|█▌        | 1379/8904 [01:39<10:02, 12.49file/s]

MuPDF error: syntax error: could not parse color space (245 0 R)



Processing files:  18%|█▊        | 1566/8904 [01:51<05:51, 20.89file/s]

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error:

Processing files:  20%|█▉        | 1777/8904 [02:06<06:10, 19.26file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: 

Processing files:  20%|██        | 1806/8904 [02:08<06:58, 16.95file/s]

MuPDF error: syntax error: could not parse color space (1000 0 R)



Processing files:  22%|██▏       | 1991/8904 [02:24<06:03, 19.04file/s]

MuPDF error: syntax error: could not parse color space (272 0 R)



Processing files:  27%|██▋       | 2372/8904 [02:50<05:19, 20.41file/s]

MuPDF error: syntax error: could not parse color space (135 0 R)



Processing files:  30%|██▉       | 2628/8904 [03:10<06:21, 16.47file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  32%|███▏      | 2879/8904 [03:33<07:39, 13.13file/s]  

MuPDF error: syntax error: could not parse color space (245 0 R)

MuPDF error: syntax error: could not parse color space (341 0 R)

MuPDF error: syntax error: could not parse color space (567 0 R)

MuPDF error: syntax error: could not parse color space (2851 0 R)



Processing files:  36%|███▋      | 3249/8904 [03:58<05:26, 17.33file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  38%|███▊      | 3397/8904 [04:08<09:06, 10.07file/s]

MuPDF error: syntax error: could not parse color space (191 0 R)



Processing files:  39%|███▊      | 3431/8904 [04:10<04:12, 21.65file/s]

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'B

Processing files:  40%|████      | 3599/8904 [04:24<04:56, 17.90file/s]

MuPDF error: syntax error: could not parse color space (243 0 R)



Processing files:  43%|████▎     | 3813/8904 [04:42<06:00, 14.11file/s]  

MuPDF error: syntax error: could not parse color space (317 0 R)



Processing files:  48%|████▊     | 4280/8904 [05:23<05:35, 13.80file/s]  

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'



Processing files:  51%|█████     | 4528/8904 [05:44<03:52, 18.82file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  54%|█████▎    | 4779/8904 [06:01<04:00, 17.15file/s]

MuPDF error: syntax error: could not parse color space (509 0 R)



Processing files:  54%|█████▍    | 4819/8904 [06:08<07:14,  9.41file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '415.41023pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '617.42479pt'



Processing files:  55%|█████▍    | 4860/8904 [06:10<04:43, 14.25file/s]

MuPDF error: syntax error: could not parse color space (360 0 R)

MuPDF error: syntax error: could not parse color space (483 0 R)



Processing files:  58%|█████▊    | 5161/8904 [07:03<03:47, 16.48file/s]  

MuPDF error: syntax error: could not parse color space (173 0 R)

MuPDF error: syntax error: could not parse color space (296 0 R)



Processing files:  62%|██████▏   | 5492/8904 [07:26<03:40, 15.46file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'



Processing files:  66%|██████▌   | 5879/8904 [08:00<03:23, 14.87file/s]  

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  67%|██████▋   | 5988/8904 [08:08<03:00, 16.19file/s]

MuPDF error: syntax error: could not parse color space (199 0 R)

MuPDF error: syntax error: could not parse color space (427 0 R)



Processing files:  78%|███████▊  | 6950/8904 [09:18<03:23,  9.59file/s]

MuPDF error: syntax error: could not parse color space (408 0 R)



Processing files:  78%|███████▊  | 6976/8904 [09:20<02:20, 13.75file/s]

MuPDF error: syntax error: could not parse color space (618 0 R)

MuPDF error: syntax error: could not parse color space (670 0 R)



Processing files:  80%|███████▉  | 7112/8904 [09:33<01:39, 17.99file/s]

MuPDF error: syntax error: could not parse color space (409 0 R)



Processing files:  83%|████████▎ | 7418/8904 [09:53<01:16, 19.45file/s]

MuPDF error: syntax error: could not parse color space (454 0 R)

MuPDF error: syntax error: could not parse color space (3740 0 R)

MuPDF error: syntax error: could not parse color space (3740 0 R)

MuPDF error: syntax error: could not parse color space (4165 0 R)

MuPDF error: syntax error: could not parse color space (4265 0 R)

MuPDF error: syntax error: could not parse color space (4265 0 R)

MuPDF error: syntax error: could not parse color space (4558 0 R)

MuPDF error: syntax error: could not parse color space (4650 0 R)

MuPDF error: syntax error: could not parse color space (4650 0 R)

MuPDF error: syntax error: could not parse color space (5058 0 R)

MuPDF error: syntax error: could not parse color space (5058 0 R)

MuPDF error: syntax error: could not parse color space (5293 0 R)

MuPDF error: syntax error: could not parse color space (5293 0 R)



Processing files:  84%|████████▍ | 7519/8904 [10:00<01:14, 18.63file/s]

MuPDF error: syntax error: could not parse color space (597 0 R)

MuPDF error: syntax error: could not parse color space (662 0 R)



Processing files:  86%|████████▋ | 7698/8904 [10:11<01:34, 12.80file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  92%|█████████▏| 8204/8904 [11:04<00:30, 22.93file/s]

MuPDF error: format error: cmsOpenProfileFromMem failed



Processing files: 100%|██████████| 8904/8904 [11:52<00:00, 12.50file/s]
Processing files:  13%|█▎        | 1185/8892 [01:36<22:03,  5.82file/s] 

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'



Processing files:  16%|█▌        | 1400/8892 [01:53<08:29, 14.70file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04675pt'



Processing files:  16%|█▋        | 1462/8892 [01:58<07:47, 15.89file/s]

MuPDF error: syntax error: could not parse color space (576 0 R)



Processing files:  17%|█▋        | 1492/8892 [02:00<07:31, 16.37file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  25%|██▌       | 2252/8892 [02:59<07:51, 14.08file/s]  

MuPDF error: syntax error: cannot find ExtGState resource 'A1'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find E

Processing files:  28%|██▊       | 2482/8892 [03:23<08:03, 13.27file/s]

MuPDF error: syntax error: could not parse color space (365 0 R)

MuPDF error: syntax error: could not parse color space (540 0 R)



Processing files:  31%|███       | 2733/8892 [03:41<05:48, 17.68file/s]

MuPDF error: syntax error: could not parse color space (363 0 R)

MuPDF error: syntax error: could not parse color space (466 0 R)

MuPDF error: syntax error: could not parse color space (876 0 R)



Processing files:  32%|███▏      | 2803/8892 [03:45<06:15, 16.22file/s]

MuPDF error: syntax error: could not parse color space (133 0 R)



Processing files:  32%|███▏      | 2830/8892 [03:46<07:11, 14.05file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  32%|███▏      | 2833/8892 [03:47<06:20, 15.92file/s]

MuPDF error: syntax error: could not parse color space (493 0 R)

MuPDF error: syntax error: could not parse color space (657 0 R)

MuPDF error: syntax error: could not parse color space (703 0 R)

MuPDF error: syntax error: could not parse color space (799 0 R)

MuPDF error: syntax error: could not parse color space (908 0 R)



Processing files:  37%|███▋      | 3307/8892 [04:26<11:11,  8.32file/s]

MuPDF error: syntax error: could not parse color space (345 0 R)

MuPDF error: syntax error: could not parse color space (734 0 R)

MuPDF error: syntax error: could not parse color space (792 0 R)

MuPDF error: syntax error: could not parse color space (828 0 R)

MuPDF error: syntax error: could not parse color space (924 0 R)

MuPDF error: syntax error: could not parse color space (995 0 R)

MuPDF error: syntax error: could not parse color space (1125 0 R)

MuPDF error: syntax error: could not parse color space (1332 0 R)

MuPDF error: syntax error: could not parse color space (1392 0 R)

MuPDF error: syntax error: could not parse color space (1524 0 R)



Processing files:  43%|████▎     | 3781/8892 [04:58<05:46, 14.76file/s]

MuPDF error: syntax error: could not parse color space (607 0 R)



Processing files:  45%|████▌     | 4023/8892 [05:17<04:44, 17.13file/s]

MuPDF error: syntax error: could not parse color space (75 0 R)

MuPDF error: syntax error: could not parse color space (138 0 R)



Processing files:  49%|████▉     | 4365/8892 [05:41<05:33, 13.57file/s]

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'



Processing files:  50%|█████     | 4449/8892 [05:47<05:10, 14.29file/s]

MuPDF error: syntax error: could not parse color space (427 0 R)

MuPDF error: syntax error: could not parse color space (674 0 R)

MuPDF error: syntax error: could not parse color space (427 0 R)

MuPDF error: syntax error: could not parse color space (1036 0 R)



Processing files:  51%|█████     | 4498/8892 [05:50<05:01, 14.59file/s]

MuPDF error: syntax error: could not parse color space (533 0 R)

MuPDF error: syntax error: could not parse color space (803 0 R)

MuPDF error: syntax error: could not parse color space (890 0 R)

MuPDF error: syntax error: could not parse color space (979 0 R)

MuPDF error: syntax error: could not parse color space (1064 0 R)



Processing files:  54%|█████▎    | 4764/8892 [06:39<03:29, 19.70file/s]  

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  56%|█████▌    | 5001/8892 [06:53<03:51, 16.80file/s]

MuPDF error: syntax error: could not parse color space (86 0 R)

MuPDF error: syntax error: could not parse color space (208 0 R)



Processing files:  57%|█████▋    | 5065/8892 [06:57<03:14, 19.65file/s]

MuPDF error: syntax error: could not parse color space (329 0 R)

MuPDF error: syntax error: could not parse color space (356 0 R)

MuPDF error: syntax error: could not parse color space (481 0 R)

MuPDF error: syntax error: could not parse color space (577 0 R)



Processing files:  59%|█████▉    | 5285/8892 [07:12<03:37, 16.58file/s]

MuPDF error: syntax error: could not parse color space (319 0 R)



Processing files:  60%|█████▉    | 5323/8892 [07:15<03:55, 15.17file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04675pt'

MuPDF error: syntax error: unknown keyword: 'bcolor'

MuPDF error: syntax error: unknown keyword: 'ecolor'

MuPDF error: syntax error: unknown keyword: 'bcolor'

MuPDF error: syntax error: unknown keyword: 'ecolor'

MuPDF error: syntax error: unknown keyword: 'bcolor'

MuPDF error: syntax error: unknown keyword: 'ecolor'

MuPDF error: syntax error: unknown keyword: 'bcolor'

MuPDF error: syntax error: unknown keyword: 'ecolor'

MuPDF error: syntax error: unknown keyword: 'bcolor'

MuPDF error: syntax error: unknown keyword: 'ecolor'

MuPDF error: syntax error: unknown keyword: 'bcolor'

MuPDF error: syntax error: unknown keyword: 'ecolor'

MuPDF error: syntax error: unknown keyword: 'bcolor'

MuPDF error: synt

Processing files:  67%|██████▋   | 5937/8892 [07:59<02:51, 17.22file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca1.0'

MuPDF 

Processing files:  76%|███████▌  | 6756/8892 [09:07<03:49,  9.29file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.4'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.4'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.4'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.4'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.4'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.4'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.4'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.4'



Processing files:  80%|███████▉  | 7111/8892 [09:54<01:52, 15.90file/s]  

MuPDF error: syntax error: could not parse color space (466 0 R)

MuPDF error: syntax error: could not parse color space (905 0 R)



Processing files:  81%|████████  | 7162/8892 [09:59<02:35, 11.13file/s]

MuPDF error: format error: invalid code in 2d faxd

MuPDF error: syntax error: unknown keyword: '9411.5827.93'

MuPDF error: syntax error: unknown keyword: 'h7.457'

MuPDF error: syntax error: unknown keyword: '28.297252.677'

MuPDF error: syntax error: unknown keyword: '785.797100.96'

MuPDF error: syntax error: unknown keyword: '214..59'

MuPDF error: syntax error: unknown keyword: '18.001785.076'

MuPDF error: syntax error: unknown keyword: 'h74.859'

MuPDF error: syntax error: unknown keyword: '2.67.507'

MuPDF error: syntax error: unknown keyword: '6252.94163110.73'

MuPDF error: syntax error: unknown keyword: '2.67.50741631.43044.304'

MuPDF error: syntax error: unknown keyword: '693.5430443.46'

MuPDF error: syntax error: unknown keyword: '4124.348.539'

MuPDF error: syntax error: unknown keyword: '4l'

MuPDF error: syntax error: unknown keyword: '44.24149.687.73'

MuPDF error: syntax error: unknown keyword: '2.674149.687.73'

MuPDF error: syntax error: unknown keyword: '47.4149

Processing files:  82%|████████▏ | 7267/8892 [10:07<02:06, 12.86file/s]

MuPDF error: syntax error: could not parse color space (203 0 R)



Processing files:  87%|████████▋ | 7778/8892 [10:46<06:42,  2.77file/s]

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'



Processing files:  88%|████████▊ | 7856/8892 [10:53<01:59,  8.66file/s]

MuPDF error: syntax error: could not parse color space (205 0 R)



Processing files:  90%|█████████ | 8034/8892 [11:10<01:27,  9.86file/s]

MuPDF error: syntax error: could not parse color space (842 0 R)



Processing files:  92%|█████████▏| 8160/8892 [11:20<00:47, 15.36file/s]

MuPDF error: syntax error: could not parse color space (1845 0 R)



Processing files:  92%|█████████▏| 8172/8892 [11:21<00:42, 16.91file/s]

MuPDF error: syntax error: syntax error in content stream

MuPDF error: syntax error: unknown keyword: '@pgfcolorspaces'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@pgfcolorspaces'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'



Processing files:  96%|█████████▌| 8523/8892 [11:49<00:27, 13.48file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files: 100%|██████████| 8892/8892 [12:14<00:00, 12.11file/s]
Processing files:  11%|█         | 945/8824 [01:18<12:46, 10.28file/s]  

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  13%|█▎        | 1119/8824 [01:31<09:56, 12.91file/s]

MuPDF error: syntax error: could not parse color space (2923 0 R)

MuPDF error: syntax error: could not parse color space (6643 0 R)

MuPDF error: syntax error: could not parse color space (9671 0 R)



Processing files:  13%|█▎        | 1139/8824 [01:33<08:54, 14.37file/s]

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'



Processing files:  17%|█▋        | 1542/8824 [02:02<07:30, 16.18file/s]

MuPDF error: syntax error: syntax error in content stream

MuPDF error: syntax error: unknown keyword: '@pgfcolorspaces'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@pgfcolorspaces'

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

Processing files:  18%|█▊        | 1610/8824 [02:08<13:53,  8.65file/s]

MuPDF error: library error: FT_New_Memory_Face(Times): unknown file format

MuPDF error: library error: FT_New_Memory_Face(Times,Bold): unknown file format

MuPDF error: library error: FT_New_Memory_Face(Times,Italic): unknown file format



Processing files:  26%|██▌       | 2259/8824 [02:58<07:09, 15.29file/s]

MuPDF error: syntax error: could not parse color space (425 0 R)



Processing files:  29%|██▉       | 2572/8824 [03:22<06:09, 16.92file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04675pt'



Processing files:  30%|██▉       | 2606/8824 [03:25<09:07, 11.35file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  30%|███       | 2677/8824 [03:30<09:22, 10.93file/s]

MuPDF error: syntax error: could not parse color space (203 0 R)



Processing files:  34%|███▍      | 3040/8824 [03:54<05:30, 17.51file/s]

MuPDF error: syntax error: could not parse color space (418 0 R)

MuPDF error: syntax error: could not parse color space (716 0 R)

MuPDF error: syntax error: could not parse color space (1036 0 R)

MuPDF error: syntax error: could not parse color space (1107 0 R)

MuPDF error: syntax error: could not parse color space (1185 0 R)



Processing files:  43%|████▎     | 3836/8824 [05:03<04:22, 18.98file/s]  

MuPDF error: syntax error: could not parse color space (402 0 R)



Processing files:  46%|████▌     | 4027/8824 [05:16<06:41, 11.94file/s]

MuPDF error: syntax error: could not parse color space (717 0 R)



Processing files:  47%|████▋     | 4120/8824 [05:21<06:06, 12.82file/s]

MuPDF error: syntax error: could not parse color space (635 0 R)



Processing files:  48%|████▊     | 4217/8824 [05:27<04:41, 16.38file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF 

Processing files:  50%|████▉     | 4409/8824 [05:41<04:52, 15.09file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04684pt'



Processing files:  51%|█████     | 4473/8824 [05:46<04:31, 16.01file/s]

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'



Processing files:  51%|█████     | 4498/8824 [05:49<17:30,  4.12file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  51%|█████▏    | 4525/8824 [05:51<05:44, 12.46file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: 

Processing files:  53%|█████▎    | 4712/8824 [06:08<03:20, 20.49file/s]

MuPDF error: syntax error: could not parse color space (1035 0 R)

MuPDF error: syntax error: could not parse color space (1099 0 R)

MuPDF error: syntax error: could not parse color space (1193 0 R)

MuPDF error: syntax error: could not parse color space (5794 0 R)



Processing files:  54%|█████▍    | 4761/8824 [06:12<03:00, 22.45file/s]

MuPDF error: syntax error: could not parse color space (821 0 R)

MuPDF error: syntax error: could not parse color space (1539 0 R)

MuPDF error: syntax error: could not parse color space (1603 0 R)

MuPDF error: syntax error: could not parse color space (1666 0 R)

MuPDF error: syntax error: could not parse color space (1937 0 R)

MuPDF error: syntax error: could not parse color space (2037 0 R)

MuPDF error: syntax error: could not parse color space (2147 0 R)



Processing files:  55%|█████▍    | 4813/8824 [06:16<03:41, 18.12file/s]

MuPDF error: syntax error: could not parse color space (285 0 R)



Processing files:  55%|█████▌    | 4872/8824 [06:20<04:21, 15.09file/s]

MuPDF error: syntax error: could not parse color space (453 0 R)

MuPDF error: syntax error: could not parse color space (503 0 R)

MuPDF error: syntax error: could not parse color space (713 0 R)

MuPDF error: syntax error: could not parse color space (734 0 R)

MuPDF error: syntax error: could not parse color space (754 0 R)



Processing files:  56%|█████▌    | 4938/8824 [06:24<02:30, 25.88file/s]

MuPDF error: syntax error: could not parse color space (379 0 R)

MuPDF error: syntax error: could not parse color space (4509 0 R)



Processing files:  58%|█████▊    | 5120/8824 [06:39<03:36, 17.11file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.0'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.0'

MuPDF 

Processing files:  61%|██████    | 5387/8824 [07:03<04:36, 12.44file/s]

MuPDF error: syntax error: could not parse color space (56 0 R)

MuPDF error: syntax error: could not parse color space (207 0 R)

MuPDF error: syntax error: could not parse color space (342 0 R)

MuPDF error: syntax error: could not parse color space (406 0 R)

MuPDF error: syntax error: could not parse color space (483 0 R)

MuPDF error: syntax error: could not parse color space (513 0 R)

MuPDF error: syntax error: could not parse color space (596 0 R)

MuPDF error: syntax error: could not parse color space (723 0 R)

MuPDF error: syntax error: could not parse color space (872 0 R)



Processing files:  64%|██████▍   | 5642/8824 [07:22<04:39, 11.37file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  71%|███████   | 6264/8824 [08:10<03:21, 12.70file/s]

MuPDF error: format error: No default Layer config



Processing files:  72%|███████▏  | 6336/8824 [08:16<03:05, 13.43file/s]

MuPDF error: syntax error: could not parse color space (682 0 R)



Processing files:  76%|███████▌  | 6668/8824 [08:37<02:04, 17.34file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  76%|███████▋  | 6734/8824 [08:42<02:00, 17.39file/s]

MuPDF error: syntax error: could not parse color space (292 0 R)



Processing files:  78%|███████▊  | 6908/8824 [08:55<01:47, 17.84file/s]

MuPDF error: format error: No default Layer config



Processing files:  83%|████████▎ | 7282/8824 [09:23<01:12, 21.25file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  84%|████████▍ | 7434/8824 [09:33<02:37,  8.85file/s]

MuPDF error: syntax error: could not parse color space (1676 0 R)

MuPDF error: syntax error: could not parse color space (1750 0 R)

MuPDF error: syntax error: could not parse color space (2484 0 R)

MuPDF error: syntax error: could not parse color space (4625 0 R)

MuPDF error: syntax error: could not parse color space (6933 0 R)

MuPDF error: syntax error: could not parse color space (9636 0 R)

MuPDF error: syntax error: could not parse color space (14592 0 R)



Processing files:  86%|████████▌ | 7607/8824 [09:45<00:59, 20.51file/s]

MuPDF error: syntax error: could not parse color space (418 0 R)



Processing files:  87%|████████▋ | 7705/8824 [09:52<01:11, 15.65file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  88%|████████▊ | 7772/8824 [09:56<00:49, 21.11file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  89%|████████▉ | 7847/8824 [10:01<01:19, 12.30file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04684pt'



Processing files:  90%|█████████ | 7956/8824 [10:08<00:42, 20.56file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@CA0.5'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.5'



Processing files:  90%|█████████ | 7965/8824 [10:09<01:04, 13.25file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  98%|█████████▊| 8634/8824 [10:59<00:14, 13.37file/s]

MuPDF error: syntax error: could not parse color space (307 0 R)



Processing files:  99%|█████████▉| 8732/8824 [11:05<00:05, 17.67file/s]

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource 'R13'

MuPDF error: syntax error: cannot find XObject resource

Processing files: 100%|██████████| 8824/8824 [11:11<00:00, 13.15file/s]
Processing files:   9%|▉         | 629/7143 [00:48<06:03, 17.92file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'

MuPDF error: syntax error: cannot find ExtGState resource 'pgf@ca0.8'



Processing files:  16%|█▋        | 1167/7143 [01:28<09:58,  9.99file/s]

MuPDF error: syntax error: cannot find ExtGState resource 'A1'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A1'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A1'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A1'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A1'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find ExtGState resource 'A2'

MuPDF error: syntax error: cannot find E

Processing files:  17%|█▋        | 1225/7143 [01:32<07:23, 13.34file/s]

MuPDF error: syntax error: could not parse color space (192 0 R)



Processing files:  22%|██▏       | 1575/7143 [02:09<06:30, 14.26file/s]  

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  23%|██▎       | 1651/7143 [02:15<05:39, 16.19file/s]

MuPDF error: syntax error: could not parse color space (93 0 R)

MuPDF error: syntax error: could not parse color space (325 0 R)



Processing files:  23%|██▎       | 1663/7143 [02:16<07:36, 12.02file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  26%|██▌       | 1845/7143 [02:31<04:15, 20.77file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  28%|██▊       | 2019/7143 [02:43<07:56, 10.74file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: 

Processing files:  33%|███▎      | 2364/7143 [03:11<06:58, 11.42file/s]

MuPDF error: syntax error: syntax error in content stream

MuPDF error: syntax error: unknown keyword: '@pgfcolorspaces'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@pgfcolorspaces'

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

MuPDF error: syntax error: unknown keyword: '@resources'

MuPDF error: syntax error: unknown keyword: 'put'

Processing files:  35%|███▌      | 2504/7143 [03:22<13:23,  5.78file/s]

MuPDF error: syntax error: could not parse color space (456 0 R)



Processing files:  44%|████▍     | 3138/7143 [04:10<03:01, 22.09file/s]

MuPDF error: syntax error: could not parse color space (1015 0 R)

MuPDF error: syntax error: could not parse color space (1159 0 R)



Processing files:  45%|████▌     | 3221/7143 [04:16<04:44, 13.81file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '614.295pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '794.96999pt'



Processing files:  50%|████▉     | 3556/7143 [04:44<03:13, 18.51file/s]

MuPDF error: syntax error: could not parse color space (264 0 R)

MuPDF error: syntax error: could not parse color space (634 0 R)

MuPDF error: syntax error: could not parse color space (726 0 R)



Processing files:  50%|████▉     | 3564/7143 [04:44<04:45, 12.52file/s]

MuPDF error: syntax error: could not parse color space (175 0 R)



Processing files:  54%|█████▎    | 3829/7143 [05:04<06:14,  8.84file/s]

MuPDF error: syntax error: could not parse color space (65 0 R)



Processing files:  56%|█████▌    | 3993/7143 [05:18<04:39, 11.26file/s]

MuPDF error: syntax error: could not parse color space (3210 0 R)



Processing files:  67%|██████▋   | 4807/7143 [06:17<02:36, 14.91file/s]

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'



Processing files:  69%|██████▉   | 4947/7143 [06:28<01:49, 20.11file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  71%|███████▏  | 5096/7143 [06:38<01:43, 19.83file/s]

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'Black'

MuPDF error: syntax error: unknown keyword: 'B

Processing files:  73%|███████▎  | 5181/7143 [06:44<02:51, 11.45file/s]

MuPDF error: syntax error: could not parse color space (161 0 R)



Processing files:  74%|███████▍  | 5292/7143 [06:54<01:35, 19.41file/s]

MuPDF error: syntax error: could not parse color space (437 0 R)



Processing files:  79%|███████▉  | 5674/7143 [07:19<01:13, 19.96file/s]

MuPDF error: syntax error: unknown keyword: 'pagesize'

MuPDF error: syntax error: unknown keyword: 'width'

MuPDF error: syntax error: unknown keyword: '597.50787pt'

MuPDF error: syntax error: unknown keyword: 'height'

MuPDF error: syntax error: unknown keyword: '845.04684pt'



Processing files:  86%|████████▋ | 6167/7143 [07:55<00:59, 16.44file/s]

MuPDF error: syntax error: could not parse color space (65 0 R)

MuPDF error: syntax error: could not parse color space (194 0 R)



Processing files:  88%|████████▊ | 6270/7143 [08:02<01:03, 13.75file/s]

MuPDF error: syntax error: could not parse color space (328 0 R)

MuPDF error: syntax error: could not parse color space (372 0 R)

MuPDF error: syntax error: could not parse color space (575 0 R)

MuPDF error: syntax error: could not parse color space (596 0 R)



Processing files:  91%|█████████ | 6470/7143 [08:15<00:49, 13.49file/s]

MuPDF error: syntax error: could not parse color space (334 0 R)

MuPDF error: syntax error: could not parse color space (712 0 R)

MuPDF error: syntax error: could not parse color space (744 0 R)



Processing files:  92%|█████████▏| 6538/7143 [08:20<00:53, 11.28file/s]

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'

MuPDF error: syntax error: unknown keyword: 'cmyk'



Processing files:  96%|█████████▌| 6829/7143 [08:40<00:21, 14.50file/s]

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations

MuPDF error: unsupported error: cannot create appearance stream for Screen annotations



Processing files:  97%|█████████▋| 6917/7143 [08:46<00:12, 17.92file/s]

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'

MuPDF error: syntax error: cannot find XObject resource 'arial-minus'



Processing files: 100%|██████████| 7143/7143 [09:02<00:00, 13.18file/s]

Successes: 50004. Failures: 2268. Node IDs: 52596.





Successes: 50004. Failures: 2268. Node IDs: 52596.


In [12]:
results

{'1809.01604': 'Merging datasets is a key operation for data analytics. A frequent requirement for merging is joining across columns that have different surface forms for the same entity. For instance, the name of a person might be represented as Douglas Adams, Douglas Noel Adams, D. Adams or Adams, Douglas. Similarly, ontology alignment can require recognizing distinct surface forms of the same entity, especially when ontologies are independently developed. This problem occurs for many entity types such as people’s names, company names, addresses, product descriptions, conference venues, or even people’s faces. Data management systems have however, largely focussed solely on equi-joins, where string or numeric equality determines which rows should be joined, because such joins are efﬁcient. We propose a different approach to joining different surface representations of the same entity, inspired by recent advances in deep learning. Our approach depends on (a) mapping surface forms into

In [13]:
with open(output_file, "w", encoding='utf-8') as f:
    for filename, text in results.items():
        f.write(text + "\n")

In [None]:
print(failures)

In [14]:
concatenate_files('graph-v2/data-v2.txt', 'graph-v2/ANC_500.txt', 'graph-v2/data-v2-500.txt')

In [15]:
for txtf in ['data-v2-500.txt']:
    total_word_count = 0
    total_lines = 0

    with open(f'graph-v2/{txtf}', 'r', encoding='utf-8') as file:
        for line in file:
            total_word_count += len(line.split())
            total_lines += 1

    mean_word_count = total_word_count / total_lines if total_lines > 0 else 0
    print(f'=== {txtf} ===')
    print("Mean word count:", math.ceil(mean_word_count))
    print()

=== data-v2-500.txt ===
Mean word count: 606



## ***Parallel*** (Only works for downloading and processing papers)

In [None]:
def process_range(paper_ids, start, end):
    results = [(i, "FAILED") for i in range(start, end)]
    failed_papers = [[] for _ in range(3)]  # Track failures: [download, extraction, introduction]
    
    #for i, paper_id in enumerate(paper_ids, start=start):
    for i, paper_id in enumerate(tqdm(paper_ids, total=len(paper_ids), desc="Processing Papers"), start=start):

        pdf_stream = download_pdf(paper_id)
        if not pdf_stream:
            failed_papers[0].append((i, paper_id))
            continue
        
        text = extract_text_from_pdf(pdf_stream)
        if text is None:
            failed_papers[1].append((i, paper_id))
            continue
        
        introduction_text = extract_introduction(text)
        if introduction_text is None:
            failed_papers[2].append((i, paper_id))
            continue
        
        results[i - start] = (i, introduction_text)
    
    return results, failed_papers

def process_papers_parallel(id_file, ranges, output_file, max_workers=5):
    with open(id_file, "r") as f:
        paper_ids = [line.strip() for line in f]
    
    failed_papers = [[] for _ in range(3)]  # Track failures: [download, extraction, introduction]
    results = ["FAILED"] * len(paper_ids)  # Initialize output list with "FAILED"
    
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        future_to_range = {executor.submit(process_range, paper_ids[r[0]:r[1]], r[0], r[1]): r for r in ranges}
        
        for future in future_to_range:
            range_results, range_failures = future.result()
            for i, text in range_results:
                results[i] = text
            for j in range(3):
                failed_papers[j].extend(range_failures[j])
    

    return failed_papers, results

In [None]:
# Create ranges
num_ids = 52596
batch_size = int(num_ids / 6) # 6 is the number of processes  

ranges = []

start = 0
while start < num_ids:
    end = min(start + batch_size, num_ids)
    ranges.append([start, end])
    start = end

ranges[-2][1] = ranges[-1][1]
del ranges[-1]
print(ranges)

[[0, 5259], [5259, 10518], [10518, 15777], [15777, 21036], [21036, 26295], [26295, 31554], [31554, 36813], [36813, 42072], [42072, 47331], [47331, 52596]]


In [None]:
failed_papers, results = process_papers_parallel(id_file, ranges, output_file)

In [None]:
with open(output_file, "w", encoding='utf-8') as f:
    for text in results:
        f.write(text + "\n")
    
with open("graph-v2/Failed_papers.txt", "w") as f:
    f.write("Failed Downloads:\n")
    for idx, pid in failed_papers[0]:
        f.write(f"{idx}: {pid}\n")
    f.write("\nFailed Extraction:\n")
    for idx, pid in failed_papers[1]:
        f.write(f"{idx}: {pid}\n")
    f.write("\nFailed Introduction Detection:\n")
    for idx, pid in failed_papers[2]:
        f.write(f"{idx}: {pid}\n")