<a href="https://colab.research.google.com/github/TamannaAhmad/research-paper-analyser/blob/main/image_caption_extraction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pymupdf



In [None]:
import fitz  # PyMuPDF
import os
import re
from tqdm import tqdm
import numpy as np
from PIL import Image
import io
from google.colab import drive
import shutil
import zipfile
from google.colab import files

In [None]:
# mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def extract_caption(page, image_rect):
    x0, y0, x1, y1 = image_rect
    page_width = page.rect.width
    page_height = page.rect.height

    # Get all text blocks on the page
    blocks = page.get_text("blocks")

    # Find text blocks that might be captions
    potential_captions = []

    for block in blocks:
        block_rect = fitz.Rect(block[:4])
        block_text = block[4]

        # Skip blocks with very little text
        if len(block_text.strip()) < 5:
            continue

        # Check if block is below or very near the image
        is_below = (block_rect.y0 >= y1 - 5 and
                    block_rect.y0 <= y1 + 100 and
                    max(0, block_rect.x0 - x1) < 50 and
                    max(0, x0 - block_rect.x1) < 50)

        # Check overlap with image column
        x_overlap = (min(block_rect.x1, x1) - max(block_rect.x0, x0))
        has_x_overlap = x_overlap > 0

        # Check if text contains caption indicators
        has_caption_text = (re.search(r'\b(fig|figure)\.?\s*\d+', block_text, re.IGNORECASE) is not None)

        # Either positioned correctly or contains caption text
        if (is_below or has_x_overlap) and has_caption_text:
            # Calculate distance from image
            center_img = ((x0 + x1)/2, (y0 + y1)/2)
            center_block = ((block_rect.x0 + block_rect.x1)/2, (block_rect.y0 + block_rect.y1)/2)
            distance = ((center_img[0] - center_block[0])**2 + (center_img[1] - center_block[1])**2)**0.5

            potential_captions.append((block_text, distance))

    if potential_captions:
        # Sort by distance to image
        potential_captions.sort(key=lambda x: x[1])
        return potential_captions[0][0].strip()

    # Fallback: use broader search with large area below image
    caption_area = fitz.Rect(max(0, x0 - 50), y1, min(page_width, x1 + 50), min(page_height, y1 + 150))
    caption_text = page.get_text("text", clip=caption_area)

    if caption_text:
        # Try to find caption patterns
        match = re.search(r'((?:Figure|Fig\.?)\s*\d+[\\.:]?.*?)(?=\n\n|\Z)', caption_text, re.DOTALL | re.IGNORECASE)
        if match:
            return match.group(1).strip()

    return None

In [None]:
def should_merge_images(rect1, rect2, proximity_threshold=50):
    """Determine if two image rectangles should be considered part of the same figure"""
    # Check horizontal alignment
    horizontal_aligned = (abs(rect1.y0 - rect2.y0) < proximity_threshold or
                          abs(rect1.y1 - rect2.y1) < proximity_threshold)

    # Check vertical alignment
    vertical_aligned = (abs(rect1.x0 - rect2.x0) < proximity_threshold or
                        abs(rect1.x1 - rect2.x1) < proximity_threshold)

    # Check proximity
    center1 = ((rect1.x0 + rect1.x1)/2, (rect1.y0 + rect1.y1)/2)
    center2 = ((rect2.x0 + rect2.x1)/2, (rect2.y0 + rect2.y1)/2)
    distance = ((center1[0] - center2[0])**2 + (center1[1] - center2[1])**2)**0.5

    # Check overlap
    overlap = not (rect1.x1 < rect2.x0 or rect2.x1 < rect1.x0 or
                  rect1.y1 < rect2.y0 or rect2.y1 < rect1.y0)

    return (horizontal_aligned or vertical_aligned) and (distance < proximity_threshold * 3 or overlap)

In [None]:
def get_figure_number(caption_text):
    """Extract figure number from caption text"""
    if not caption_text:
        return None

    match = re.search(r'(?:Figure|Fig\.?)\s*(\d+)', caption_text, re.IGNORECASE)
    if match:
        return int(match.group(1))
    return None

In [None]:
def extract_images(file_path, output_base_dir):
    pdf_file = fitz.open(file_path)
    total_figures = 0  # count total number of figures in PDF
    total_captions = 0  # count total number of captions found

    folder_name = os.path.basename(file_path)[:-4]  # extract PDF name without extension
    folder_path = os.path.join(output_base_dir, folder_name)

    if not os.path.exists(folder_path):  # create folder if it doesn't exist
        os.makedirs(folder_path)

    # Process each page
    for page_index in range(len(pdf_file)):
        page = pdf_file.load_page(page_index)  # load the page
        image_list = page.get_images(full=True)  # get images on the page

        if not image_list:
            continue

        # Get all image rectangles and group them by figure
        image_data = []
        for img in image_list:
            xref = img[0]
            try:
                base_image = pdf_file.extract_image(xref)
                rects = page.get_image_rects(xref)

                if rects:
                    caption_text = extract_caption(page, rects[0])
                    fig_number = get_figure_number(caption_text)

                    image_data.append({
                        'xref': xref,
                        'rect': rects[0],
                        'caption': caption_text,
                        'fig_number': fig_number,
                        'base_image': base_image,
                        'grouped': False
                    })
            except Exception as e:
                print(f"[!] Error analyzing image on page {page_index+1}: {e}")

        # Group images by proximity and caption
        figure_groups = []

        # First, try to group by figure number
        for idx, img_data in enumerate(image_data):
            if img_data['grouped']:
                continue

            group = [idx]
            img_data['grouped'] = True

            # If this image has a figure number, look for other parts with the same number
            if img_data['fig_number'] is not None:
                for j, other_img in enumerate(image_data):
                    if j != idx and not other_img['grouped'] and other_img['fig_number'] == img_data['fig_number']:
                        group.append(j)
                        other_img['grouped'] = True

            figure_groups.append(group)

        # Then try to group remaining images by proximity
        for idx, img_data in enumerate(image_data):
            if img_data['grouped']:
                continue

            group = [idx]
            img_data['grouped'] = True

            for j, other_img in enumerate(image_data):
                if j != idx and not other_img['grouped'] and should_merge_images(img_data['rect'], other_img['rect']):
                    group.append(j)
                    other_img['grouped'] = True

            figure_groups.append(group)

        # Process each figure group
        for group_idx, group in enumerate(figure_groups):
            total_figures += 1

            # Use the caption from the first image in the group
            caption_text = image_data[group[0]]['caption']

            # If we have multiple images in the group
            if len(group) > 1:
                # We'll keep track of the images for combining but not save individual parts
                pil_images = []
                for img_idx in group:
                    img_data = image_data[img_idx]
                    base_image = img_data['base_image']
                    image_bytes = base_image["image"]

                    # Convert bytes to PIL Image
                    img = Image.open(io.BytesIO(image_bytes))
                    pil_images.append(img)

                # Try to arrange the parts in a grid
                try:
                    # Determine layout (simple approach)
                    if len(pil_images) <= 2:
                        # Horizontal layout for 1-2 images
                        total_width = sum(img.width for img in pil_images)
                        max_height = max(img.height for img in pil_images)

                        combined = Image.new('RGB', (total_width, max_height), (255, 255, 255))

                        x_offset = 0
                        for img in pil_images:
                            combined.paste(img, (x_offset, 0))
                            x_offset += img.width
                    else:
                        # Grid layout for 3+ images
                        cols = min(len(pil_images), 3)  # Max 3 columns
                        rows = (len(pil_images) + cols - 1) // cols

                        # Calculate sizes
                        max_width = max(img.width for img in pil_images)
                        max_height = max(img.height for img in pil_images)

                        combined = Image.new('RGB', (max_width * cols, max_height * rows), (255, 255, 255))

                        for i, img in enumerate(pil_images):
                            row = i // cols
                            col = i % cols
                            combined.paste(img, (col * max_width, row * max_height))

                    # Save combined figure
                    combined_path = f"{folder_path}/figure{page_index+1}_{group_idx+1}.png"
                    combined.save(combined_path)
                    print(f"[+] Combined figure saved as {combined_path}")

                except Exception as e:
                    print(f"[!] Error combining figure parts: {e}")
            else:
                # Just a single image
                img_data = image_data[group[0]]
                base_image = img_data['base_image']
                image_bytes = base_image["image"]
                image_ext = base_image["ext"]

                # Save image
                image_name = f"{folder_path}/figure{page_index+1}_{group_idx+1}.{image_ext}"
                with open(image_name, "wb") as image_file:
                    image_file.write(image_bytes)
                print(f"[+] Figure saved as {image_name}")

            # Save caption if available
            if caption_text:
                caption_file = f"{folder_path}/figure{page_index+1}_{group_idx+1}_caption.txt"
                with open(caption_file, "w", encoding="utf-8") as f:
                    f.write(caption_text)
                caption_preview = caption_text[:50] + "..." if len(caption_text) > 50 else caption_text
                print(f"[+] Caption: {caption_preview}")
                total_captions += 1
            else:
                print(f"[!] No caption found for figure {page_index+1}_{group_idx+1}")

    print(f"Processed {file_path}: {total_figures} figures, {total_captions} captions found")

In [None]:
def find_pdf_files(directory):
    pdf_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith('.pdf'):
                pdf_files.append(os.path.join(root, file))
    return pdf_files

In [None]:
output_base_dir = '/content/training_data_images'
if not os.path.exists(output_base_dir):
    os.makedirs(output_base_dir)

# Choose where to save the final results in Google Drive
drive_output_path = '/content/drive/MyDrive/training_data_images'
if not os.path.exists(drive_output_path):
    os.makedirs(drive_output_path)

# Setup the sample directory for PDF files
sample_dir = '/content/sample_pdfs/sample'
if not os.path.exists(sample_dir):
    os.makedirs(sample_dir)

# Mount Google Drive
drive.mount('/content/drive')

# Upload a folder of PDFs using a zip file
print("Please upload a zip file containing your PDF files...")
uploaded = files.upload()  # Upload a zip file

# Extract the uploaded zip file
for filename in uploaded.keys():
    if filename.endswith('.zip'):
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall(sample_dir)
        print(f"Extracted {filename} to {sample_dir}")

# Process all PDFs in the sample directory - CORRECTED CODE HERE
pdf_files = find_pdf_files(sample_dir)
print(f"Found {len(pdf_files)} PDF files to process")

for filepath in tqdm(pdf_files):
    try:
        extract_images(filepath, output_base_dir)
    except Exception as e:
        print(f"An unexpected error occurred while processing {os.path.basename(filepath)}: {e}")

# Copy results back to Google Drive
shutil.copytree(output_base_dir, drive_output_path, dirs_exist_ok=True)
print(f"All extracted images and captions have been saved to Google Drive at: {drive_output_path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Please upload a zip file containing your PDF files...


Saving sample.zip to sample.zip
Extracted sample.zip to /content/sample_pdfs/sample
Found 100 PDF files to process


  0%|          | 0/100 [00:00<?, ?it/s]

[+] Combined figure saved as /content/training_data_images/1806.05575/figure6_1.png
[+] Caption: Figure 3. CIFAR-10: Real example images (left), sa...
[+] Figure saved as /content/training_data_images/1806.05575/figure7_1.png
[+] Caption: Figure 6. Small ImageNet inpainting examples. Left...
[+] Combined figure saved as /content/training_data_images/1806.05575/figure7_2.png
[+] Caption: tially outperforms the baseline in terms of ﬁnal p...
[+] Combined figure saved as /content/training_data_images/1806.05575/figure8_1.png
[+] Caption: Figure 7. Class-conditional samples from PixelIQN....
[+] Combined figure saved as /content/training_data_images/1806.05575/figure13_1.png
[+] Caption: Figure 9. CelebA 64x64: Real example images (left)...
[+] Figure saved as /content/training_data_images/1806.05575/figure14_1.png
[+] Caption: Figure 10. Samples from PixelIQN trained on small ...
[+] Combined figure saved as /content/training_data_images/1806.05575/figure15_1.png
[+] Caption: Figure 11. C

  1%|          | 1/100 [00:02<04:06,  2.49s/it]

[+] Figure saved as /content/training_data_images/1806.05575/figure16_1.png
[+] Caption: Figure 12. Inpainting. Left column: Masked image g...
Processed /content/sample_pdfs/sample/sample/1806.05575.pdf: 8 figures, 8 captions found
[+] Combined figure saved as /content/training_data_images/1806.01946/figure1_1.png
[+] Caption: Figure 1: Different valid goal states for
the inst...
[+] Combined figure saved as /content/training_data_images/1806.01946/figure5_1.png
[+] Caption: Figure 3: Initial state and goal state for GridLU-...


  2%|▏         | 2/100 [00:03<02:38,  1.61s/it]

[+] Combined figure saved as /content/training_data_images/1806.01946/figure15_1.png
[+] Caption: Figure 6: The dynamics of the GridLU world illustr...
Processed /content/sample_pdfs/sample/sample/1806.01946.pdf: 3 figures, 3 captions found
[+] Figure saved as /content/training_data_images/1807.01613v1/figure4_1.jpeg
[+] Caption: Figure 2. 1-D Regression.
Regression results on a ...
[+] Figure saved as /content/training_data_images/1807.01613v1/figure5_1.png
[+] Caption: Figure 3. Pixel-wise image regression on MNIST. Le...
[+] Figure saved as /content/training_data_images/1807.01613v1/figure6_1.png
[+] Caption: Figure 4. Pixel-wise image completion on CelebA. T...


  3%|▎         | 3/100 [00:03<01:39,  1.02s/it]

[+] Figure saved as /content/training_data_images/1807.01613v1/figure7_1.png
[+] Caption: Figure 5. Flexible image completion. In contrast t...
[+] Combined figure saved as /content/training_data_images/1807.01613v1/figure7_2.png
[+] Caption: Figure 6. Image completion with a latent variable ...
[+] Combined figure saved as /content/training_data_images/1807.01613v1/figure8_1.png
[+] Caption: Figure 7. One-shot Omniglot classiﬁcation.
At test...
Processed /content/sample_pdfs/sample/sample/1807.01613v1.pdf: 6 figures, 6 captions found
[+] Figure saved as /content/training_data_images/1804.02476/figure4_1.png
[+] Caption: Figure 1. Flow diagram for daydream sampling.
[+] Figure saved as /content/training_data_images/1804.02476/figure6_1.png
[+] Caption: Figure 2. MNIST compression costs. Unordered compr...
[+] Combined figure saved as /content/training_data_images/1804.02476/figure6_2.png
[+] Caption: Figure 3. Visualisation of the ﬁrst two principal ...
[+] Figure saved as /content/tra

  4%|▍         | 4/100 [00:10<05:00,  3.13s/it]

[+] Figure saved as /content/training_data_images/1804.02476/figure13_1.png
[+] Caption: Figure 16. ImageNet daydream samples.
Processed /content/sample_pdfs/sample/sample/1804.02476.pdf: 16 figures, 16 captions found
Processed /content/sample_pdfs/sample/sample/1811.10475v1.pdf: 0 figures, 0 captions found
[+] Combined figure saved as /content/training_data_images/1812.01608/figure2_1.png
[+] Caption: Figure 1: A representation of Multidimensional Ups...
[+] Combined figure saved as /content/training_data_images/1812.01608/figure3_1.png
[+] Caption: Figure 2: The receptive ﬁeld in a Subscale Pixel N...
[+] Figure saved as /content/training_data_images/1812.01608/figure6_1.jpeg
[+] Caption: Figure 5: Left: 8-bit 128x128 RGB ImageNet samples...
[+] Figure saved as /content/training_data_images/1812.01608/figure10_1.jpeg
[+] Caption: Figure 6: 8-bit 256x256 RGB CelebA-HQ samples from...
[+] Figure saved as /content/training_data_images/1812.01608/figure11_1.jpeg
[+] Caption: Figure 7: 25

  6%|▌         | 6/100 [00:12<03:25,  2.19s/it]

[+] Figure saved as /content/training_data_images/1812.01608/figure14_1.png
[+] Caption: Figure 10: 128x128 ImageNet 3bit; upscaled 32x32 s...
[+] Figure saved as /content/training_data_images/1812.01608/figure14_2.png
[+] Caption: Figure 11: 128x128 ImageNet 3bit samples from mode...
[+] Figure saved as /content/training_data_images/1812.01608/figure14_3.png
[+] Caption: Figure 12: 128x128 ImageNet 3bit samples from SPN
Processed /content/sample_pdfs/sample/sample/1812.01608.pdf: 10 figures, 10 captions found
[+] Figure saved as /content/training_data_images/1811.09300v1/figure8_1.png
[+] Caption: Figure 2: Evaluation Curves
Processed /content/sample_pdfs/sample/sample/1811.09300v1.pdf: 1 figures, 1 captions found
Processed /content/sample_pdfs/sample/sample/1811.02172v1.pdf: 0 figures, 0 captions found
[+] Combined figure saved as /content/training_data_images/1807.04587/figure3_1.png
[+] Caption: Figure 1: In BP and DTP, the ﬁnal layer target is ...


  9%|▉         | 9/100 [00:13<01:37,  1.07s/it]

[+] Combined figure saved as /content/training_data_images/1807.04587/figure17_1.png
[+] Caption: Figure 7: MNIST reconstructions obtained by differ...
[+] Combined figure saved as /content/training_data_images/1807.04587/figure17_2.png
[+] Caption: Figure 6: Train (solid) and test (dashed) reconstr...
Processed /content/sample_pdfs/sample/sample/1807.04587.pdf: 3 figures, 3 captions found
[+] Figure saved as /content/training_data_images/1806.09055/figure3_1.png
[+] Caption: Figure 1: An overview of DARTS: (a) Operations on ...
Processed /content/sample_pdfs/sample/sample/1806.09055.pdf: 1 figures, 1 captions found
[+] Figure saved as /content/training_data_images/1806.01186/figure1_1.png
[!] No caption found for figure 1_1
[+] Combined figure saved as /content/training_data_images/1806.01186/figure4_1.png
[+] Caption: Figure 2 | Sushi environment.
[+] Combined figure saved as /content/training_data_images/1806.01186/figure4_2.png
[+] Caption: Figure 3 | Vase environment.


 11%|█         | 11/100 [00:13<01:10,  1.26it/s]

[+] Combined figure saved as /content/training_data_images/1806.01186/figure5_1.png
[+] Caption: Figure 4 | Offsetting behavior in the Vase environ...
[+] Combined figure saved as /content/training_data_images/1806.01186/figure5_2.png
[+] Caption: However, there is a problem with directly comparin...
[+] Combined figure saved as /content/training_data_images/1806.01186/figure6_1.png
[+] Caption: Figure 6 | Box environment.
[+] Combined figure saved as /content/training_data_images/1806.01186/figure10_1.png
[+] Caption: Figure 7 | Scaled performance results for differen...
Processed /content/sample_pdfs/sample/sample/1806.01186.pdf: 7 figures, 6 captions found
Processed /content/sample_pdfs/sample/sample/1805.11593.pdf: 0 figures, 0 captions found
[+] Combined figure saved as /content/training_data_images/1803.06376/figure6_1.png
[+] Caption: Figure 1: Directional field plot for the 2-face
co...
[+] Figure saved as /content/training_data_images/1803.06376/figure6_2.png
[+] Caption: Figu

 13%|█▎        | 13/100 [00:14<00:57,  1.50it/s]

[+] Combined figure saved as /content/training_data_images/1803.06376/figure8_1.png
[+] Caption: players play strategy A, which absorbs the entire ...
[+] Combined figure saved as /content/training_data_images/1803.06376/figure8_2.png
[+] Caption: (a)
(b)
(c)
Figure 7: Dynamics of 3 2-faces of the...
[+] Figure saved as /content/training_data_images/1803.06376/figure8_3.png
[+] Caption: Figure 8: Trajectory plot of the first CP game.
[+] Combined figure saved as /content/training_data_images/1803.06376/figure8_4.png
[+] Caption: (a)
(b)
Figure 6: (a) dynamics of [36, 35, 24, 3, ...
Processed /content/sample_pdfs/sample/sample/1803.06376.pdf: 9 figures, 9 captions found
[+] Combined figure saved as /content/training_data_images/1804.04438/figure3_1.png
[+] Caption: Figure 1: (a) Generating deformed images: To rando...
[+] Figure saved as /content/training_data_images/1804.04438/figure5_1.png
[+] Caption: Figure 2: Pooling confers stability to deformation...
[+] Combined figure saved as 

 14%|█▍        | 14/100 [00:16<01:13,  1.17it/s]

[+] Figure saved as /content/training_data_images/1804.04438/figure12_1.png
[!] No caption found for figure 12_1
[+] Figure saved as /content/training_data_images/1804.04438/figure12_2.png
[!] No caption found for figure 12_2
[+] Figure saved as /content/training_data_images/1804.04438/figure12_3.png
[!] No caption found for figure 12_3
Processed /content/sample_pdfs/sample/sample/1804.04438.pdf: 8 figures, 5 captions found
[+] Figure saved as /content/training_data_images/1811.05931v1/figure2_1.png
[+] Caption: Figure 1: Screenshots from (a) the Cleanup game, (...
[+] Figure saved as /content/training_data_images/1811.05931v1/figure4_1.png
[+] Caption: Figure 2: (a) Agent Aj adjusts policy πj(s,a|ϕ) us...
[+] Figure saved as /content/training_data_images/1811.05931v1/figure5_1.png
[+] Caption: Figure 3: (a) Agents assigned and evolved with ind...
[+] Figure saved as /content/training_data_images/1811.05931v1/figure6_1.png
[+] Caption: Figure 4: Total episode rewards, aggregated over p

 15%|█▌        | 15/100 [00:18<01:32,  1.08s/it]

[+] Figure saved as /content/training_data_images/1811.05931v1/figure7_1.png
[+] Caption: Figure 5: Social outcome metrics for (a) Cleanup a...
[+] Figure saved as /content/training_data_images/1811.05931v1/figure7_2.png
[+] Caption: Figure 6: Distribution of layer 2 weights and bias...
Processed /content/sample_pdfs/sample/sample/1811.05931v1.pdf: 6 figures, 6 captions found
[+] Combined figure saved as /content/training_data_images/1804.00168/figure2_1.png
[+] Caption: Figure 1: (a) Our environment is built of real-wor...
[+] Combined figure saved as /content/training_data_images/1804.00168/figure4_1.png
[+] Caption: Figure 2: (a) In the illustration of the goal desc...
[+] Combined figure saved as /content/training_data_images/1804.00168/figure6_1.png
[+] Caption: Figure 3: Average per-episode rewards (y axis) are...
[+] Combined figure saved as /content/training_data_images/1804.00168/figure6_2.png
[+] Caption: We visualise trajectories from the trained agent o...


 16%|█▌        | 16/100 [00:24<03:08,  2.24s/it]

[+] Figure saved as /content/training_data_images/1804.00168/figure7_1.png
[+] Caption: Figure 5: Illustration of medium-sized held-out
gr...
[+] Figure saved as /content/training_data_images/1804.00168/figure9_1.png
[+] Caption: Figure 6: Left: Illustration of training regimes: ...
[+] Figure saved as /content/training_data_images/1804.00168/figure9_2.jpeg
[!] No caption found for figure 9_2
[+] Figure saved as /content/training_data_images/1804.00168/figure13_1.png
[+] Caption: Figure 7: Learning curves of the CityNav agent (2L...
[+] Figure saved as /content/training_data_images/1804.00168/figure14_1.png
[+] Caption: Figure 8: Learning curves for CityNav agents with ...
[+] Figure saved as /content/training_data_images/1804.00168/figure15_1.png
[+] Caption: Figure 9: Decoding of the agent position (blue dot...
Processed /content/sample_pdfs/sample/sample/1804.00168.pdf: 10 figures, 9 captions found
[+] Combined figure saved as /content/training_data_images/1812.01461v1/figure1_1.png

 17%|█▋        | 17/100 [00:30<04:30,  3.25s/it]

[+] Combined figure saved as /content/training_data_images/1812.01461v1/figure8_2.png
[+] Caption: Figure 7: Results of our model applied on real-wor...
[+] Figure saved as /content/training_data_images/1812.01461v1/figure11_1.png
[+] Caption: U-Net 
I3D encoder - Simple decoder
(see Fig. 2)
[+] Figure saved as /content/training_data_images/1812.01461v1/figure12_1.png
[+] Caption: Figure 10: Details of our U-Net encoder-decoder ar...
Processed /content/sample_pdfs/sample/sample/1812.01461v1.pdf: 9 figures, 9 captions found
[+] Figure saved as /content/training_data_images/1808.00508v1/figure2_1.png
[+] Caption: Figure 1: MLPs learn the identity function only fo...
[+] Combined figure saved as /content/training_data_images/1808.00508v1/figure3_1.png
[+] Caption: Figure 2: The Neural Accumulator (NAC) is a linear...
[+] Combined figure saved as /content/training_data_images/1808.00508v1/figure7_1.png
[+] Caption: We compare to three popular RNNs (UGRNN, LSTM and ...
[+] Figure saved as /

 19%|█▉        | 19/100 [00:31<02:31,  1.87s/it]

[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_1.png
[!] No caption found for figure 11_1
[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_2.png
[!] No caption found for figure 11_2
[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_3.png
[!] No caption found for figure 11_3
[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_4.png
[!] No caption found for figure 11_4
[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_5.png
[!] No caption found for figure 11_5
[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_6.png
[!] No caption found for figure 11_6
[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_7.png
[!] No caption found for figure 11_7
[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_8.png
[!] No caption found for figure 11_8
[+] Figure saved as /content/training_data_images/1808.00508v1/figure11_9.png
[!

 20%|██        | 20/100 [00:35<03:38,  2.73s/it]

[+] Figure saved as /content/training_data_images/1806.05034/figure27_1.jpeg
[+] Caption: Figure 18: Qualitative examples from the Probabili...
Processed /content/sample_pdfs/sample/sample/1806.05034.pdf: 15 figures, 15 captions found
[+] Figure saved as /content/training_data_images/1805.11592/figure2_1.png
[+] Caption: Figure 1: Illustration of the domain gap that exis...
[+] Figure saved as /content/training_data_images/1805.11592/figure3_1.png
[+] Caption: Figure 2: For the path shown in (a), t-SNE project...
[+] Figure saved as /content/training_data_images/1805.11592/figure4_1.png
[+] Caption: Figure 3: Illustration of the network architecture...
[+] Figure saved as /content/training_data_images/1805.11592/figure5_1.png
[+] Caption: Figure 4: (a) Visualization of two embedding space...
[+] Figure saved as /content/training_data_images/1805.11592/figure6_1.png
[+] Caption: Figure 5: Cycle-consistency evaluation considering...
[+] Figure saved as /content/training_data_images/1805.

 21%|██        | 21/100 [00:37<03:02,  2.31s/it]

[+] Figure saved as /content/training_data_images/1805.11592/figure15_1.png
[+] Caption: Figure 11: Visualization of the t-SNE projection o...
Processed /content/sample_pdfs/sample/sample/1805.11592.pdf: 11 figures, 11 captions found


 22%|██▏       | 22/100 [00:37<02:15,  1.73s/it]

[+] Combined figure saved as /content/training_data_images/1811.09353v1/figure2_1.png
[+] Caption: Figure 1: Fragment of the segmental neural languag...
Processed /content/sample_pdfs/sample/sample/1811.09353v1.pdf: 1 figures, 1 captions found
[+] Combined figure saved as /content/training_data_images/1811.01483v1/figure3_1.png
[+] Caption: Figure 1: Left: Contingent region in FREEWAY; an o...
[+] Figure saved as /content/training_data_images/1811.01483v1/figure5_1.png
[+] Caption: Figure 2: Learning curves on several Atari games: ...
[+] Figure saved as /content/training_data_images/1811.01483v1/figure8_1.png
[+] Caption: Figure 3: Performance plot of ADM trained online u...
[+] Figure saved as /content/training_data_images/1811.01483v1/figure8_2.png
[+] Caption: Figure 4: Curves of ARI score during training, ave...
[+] Figure saved as /content/training_data_images/1811.01483v1/figure13_1.png
[+] Caption: Figure 5: Learning curves on several Atari games: ...


 23%|██▎       | 23/100 [00:39<02:07,  1.66s/it]

[+] Combined figure saved as /content/training_data_images/1811.01483v1/figure14_1.png
[+] Caption: Figure 6: Sample of clustering result for VENTURE,...
Processed /content/sample_pdfs/sample/sample/1811.01483v1.pdf: 6 figures, 6 captions found
[+] Figure saved as /content/training_data_images/1810.08647v1/figure3_1.png
[+] Caption: Figure 1:
Causal
diagram
of
agent
A’s effect on B’...
[+] Figure saved as /content/training_data_images/1810.08647v1/figure4_1.png
[+] Caption: Figure 2: The communication model has two
A2C head...
[+] Figure saved as /content/training_data_images/1810.08647v1/figure4_2.png
[+] Caption: Figure 3: The Model of Other Agents (MOA) archi-
t...
[+] Figure saved as /content/training_data_images/1810.08647v1/figure5_1.png
[+] Caption: Figure 4: Causal diagram in the MOA
case. Shaded n...
[+] Combined figure saved as /content/training_data_images/1810.08647v1/figure5_2.png
[+] Caption: Figure 5: The two SSD environments, Cleanup (left)...
[+] Combined figure saved 

 24%|██▍       | 24/100 [00:40<02:05,  1.65s/it]

[+] Combined figure saved as /content/training_data_images/1810.08647v1/figure17_1.png
[+] Caption: experiment, and plot the results in Figure 13. Onc...
[+] Combined figure saved as /content/training_data_images/1810.08647v1/figure18_1.png
[+] Caption: Figure 14: Total collective reward over the top 5 ...
Processed /content/sample_pdfs/sample/sample/1810.08647v1.pdf: 14 figures, 14 captions found
[+] Combined figure saved as /content/training_data_images/1807.05960/figure3_1.png
[+] Caption: Figure 1: High-level intuition for LEO. While
MAML...
[+] Combined figure saved as /content/training_data_images/1807.05960/figure3_2.png
[+] Caption: Figure 2: Overview of the architecture of LEO.
[+] Combined figure saved as /content/training_data_images/1807.05960/figure7_1.png
[+] Caption: Figure 3: Meta-learning with LEO of a multimodal t...


 25%|██▌       | 25/100 [00:43<02:22,  1.91s/it]

[+] Combined figure saved as /content/training_data_images/1807.05960/figure9_1.png
[+] Caption: Figure 4: t-SNE plot of latent space codes before ...
[+] Combined figure saved as /content/training_data_images/1807.05960/figure10_1.png
[+] Caption: Figure 5: Curvature and coverage metrics for a num...
Processed /content/sample_pdfs/sample/sample/1807.05960.pdf: 5 figures, 5 captions found
[+] Combined figure saved as /content/training_data_images/1804.01756/figure6_1.png
[+] Caption: Figure 2: The negative variational lower bound (le...
[+] Combined figure saved as /content/training_data_images/1804.01756/figure6_2.png
[+] Caption: At the end of training, our VAE reached a negative...
[+] Combined figure saved as /content/training_data_images/1804.01756/figure7_1.png
[+] Caption: batch of images with many classes and samples. To ...
[+] Combined figure saved as /content/training_data_images/1804.01756/figure7_2.png
[+] Caption: Figure 5: Comparison of samples from CIFAR. The 24...
[+] 

 26%|██▌       | 26/100 [00:44<02:15,  1.83s/it]

[+] Combined figure saved as /content/training_data_images/1804.01756/figure14_1.png
[+] Caption: Figure 11: The KL-divergence between yt (left) and...
Processed /content/sample_pdfs/sample/sample/1804.01756.pdf: 9 figures, 9 captions found
[+] Figure saved as /content/training_data_images/1809.07435/figure4_1.png
[+] Caption: Figure 1: The checkered grid world environment. Tr...


 27%|██▋       | 27/100 [00:45<01:39,  1.37s/it]

[+] Figure saved as /content/training_data_images/1809.07435/figure5_1.png
[+] Caption: Figure 2: Checkered grid world policy evaluation r...
[+] Figure saved as /content/training_data_images/1809.07435/figure5_2.png
[+] Caption: Figure 3: Checkered grid world reward sequence rec...
[+] Figure saved as /content/training_data_images/1809.07435/figure5_3.png
[+] Caption: Figure 4: The wavy ring world environment. Each st...
[+] Figure saved as /content/training_data_images/1809.07435/figure6_1.png
[+] Caption: Figure 5: Wavy ring world results. Despite the use...
Processed /content/sample_pdfs/sample/sample/1809.07435.pdf: 5 figures, 5 captions found
[+] Figure saved as /content/training_data_images/1810.05017v1/figure2_1.png
[+] Caption: Figure 1: Starting from a dataset of demonstration...
[+] Figure saved as /content/training_data_images/1810.05017v1/figure3_1.png
[+] Caption: Figure 2: Imitation actor algorithm (left) and ill...
[+] Figure saved as /content/training_data_images/1810.

 28%|██▊       | 28/100 [00:52<03:48,  3.17s/it]

[+] Combined figure saved as /content/training_data_images/1810.05017v1/figure8_1.png
[+] Caption: Figure 7: Efﬁcient task policy. The task policy (w...
Processed /content/sample_pdfs/sample/sample/1810.05017v1.pdf: 6 figures, 6 captions found
[+] Combined figure saved as /content/training_data_images/1811.06407v1/figure4_1.png
[+] Caption: We now describe the architectures we use in our ex...
[+] Combined figure saved as /content/training_data_images/1811.06407v1/figure6_1.png
[+] Caption: Figure 2:
Frames from the agent moving at random i...
[+] Combined figure saved as /content/training_data_images/1811.06407v1/figure6_2.png
[+] Caption: Figure 3: Examples of agent observations for diffe...
[+] Combined figure saved as /content/training_data_images/1811.06407v1/figure8_1.png
[+] Caption: Figure 4: Example predictions for terrain. In each...
[+] Figure saved as /content/training_data_images/1811.06407v1/figure8_2.png
[+] Caption: Figure 5: Symmetry of the
(x, y, θ) prediction wit...


 30%|███       | 30/100 [00:55<02:32,  2.18s/it]

[+] Figure saved as /content/training_data_images/1806.07917/figure2_1.png
[+] Caption: Figure 1: Baldwinian evolution (left) versus Lamar...
[+] Combined figure saved as /content/training_data_images/1806.07917/figure8_1.png
[+] Caption: Figure 3: Comparison of the speed of ﬁtting of the...
[+] Figure saved as /content/training_data_images/1806.07917/figure10_1.png
[+] Caption: Figure 7: Hyperparameter evolution shown at 25 eve...
Processed /content/sample_pdfs/sample/sample/1806.07917.pdf: 3 figures, 3 captions found


 31%|███       | 31/100 [00:55<01:49,  1.58s/it]

[+] Combined figure saved as /content/training_data_images/1810.08163v1/figure3_1.png
[+] Caption: Figure 1: Left: Trajectory-centric planning over m...
[+] Figure saved as /content/training_data_images/1810.08163v1/figure13_1.png
[+] Caption: Figure 5: Simple maze example, with two coins.
Processed /content/sample_pdfs/sample/sample/1810.08163v1.pdf: 2 figures, 2 captions found
Processed /content/sample_pdfs/sample/sample/1808.01340.pdf: 0 figures, 0 captions found
Processed /content/sample_pdfs/sample/sample/1805.08913.pdf: 0 figures, 0 captions found
[+] Figure saved as /content/training_data_images/1806.10474/figure4_1.jpeg
[+] Caption: Figure 1: Schematic overview of an autoregressive ...
[+] Combined figure saved as /content/training_data_images/1806.10474/figure7_1.png
[+] Caption: Figure 2: Predictability proﬁles:
NLLs obtained by...
Processed /content/sample_pdfs/sample/sample/1806.10474.pdf: 2 figures, 2 captions found
[+] Combined figure saved as /content/training_data_image

 35%|███▌      | 35/100 [01:03<01:59,  1.83s/it]

[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_1.png
[!] No caption found for figure 13_1
[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_2.png
[!] No caption found for figure 13_2
[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_3.png
[!] No caption found for figure 13_3
[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_4.png
[!] No caption found for figure 13_4
[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_5.png
[!] No caption found for figure 13_5
[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_6.png
[!] No caption found for figure 13_6
[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_7.png
[!] No caption found for figure 13_7
[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_8.png
[!] No caption found for figure 13_8
[+] Figure saved as /content/training_data_images/1807.04225v1/figure13_9.png
[!

 37%|███▋      | 37/100 [01:03<01:24,  1.34s/it]

[+] Combined figure saved as /content/training_data_images/1811.05154v1/figure9_1.png
[+] Caption: Figure 1: The expected n-round regret of UCB1, TS,...
Processed /content/sample_pdfs/sample/sample/1811.05154v1.pdf: 1 figures, 1 captions found
[+] Combined figure saved as /content/training_data_images/1810.09026v1/figure5_1.png
[+] Caption: Figure 1: Learning Dynamics in Matching Pennies: (...
[+] Combined figure saved as /content/training_data_images/1810.09026v1/figure20_1.png
[+] Caption: Figure 3: Matching Pennies
[+] Combined figure saved as /content/training_data_images/1810.09026v1/figure20_2.png
[+] Caption: Figure 4: Rock-Paper-Scissors


 38%|███▊      | 38/100 [01:05<01:20,  1.30s/it]

[+] Combined figure saved as /content/training_data_images/1810.09026v1/figure20_3.png
[+] Caption: Figure 6: Replicator Dynamics in
the Shapley Game
[+] Combined figure saved as /content/training_data_images/1810.09026v1/figure20_4.png
[+] Caption: Figure 5: Bias Rock-Paper-Scissors
[+] Combined figure saved as /content/training_data_images/1810.09026v1/figure20_5.png
[+] Caption: Figure 7: RPG dynamics in the
Shapley Game
[+] Figure saved as /content/training_data_images/1810.09026v1/figure27_1.png
[+] Caption: Figure 10: Regression CFR convergence results; x-a...
Processed /content/sample_pdfs/sample/sample/1810.09026v1.pdf: 7 figures, 7 captions found


 39%|███▉      | 39/100 [01:05<01:05,  1.08s/it]

[+] Figure saved as /content/training_data_images/1805.12387/figure5_1.png
[+] Caption: Figure 1: The system is running in circles for 25 ...
[+] Figure saved as /content/training_data_images/1805.12387/figure5_2.png
[+] Caption: Figure 2: The system goes straight to the magenta ...
[+] Figure saved as /content/training_data_images/1805.12387/figure5_3.png
[+] Caption: Figure 3: The system is going toward the blue ball...
[+] Figure saved as /content/training_data_images/1805.12387/figure5_4.png
[+] Caption: Figure 4: The system turns when facing a wall.
[+] Figure saved as /content/training_data_images/1805.12387/figure6_1.png
[+] Caption: Figure 5: Switching goals using a the switching ag...
[+] Figure saved as /content/training_data_images/1805.12387/figure6_2.png
[+] Caption: Figure 6: Sequence of the posteriors of the differ...
[+] Figure saved as /content/training_data_images/1805.12387/figure6_3.png
[+] Caption: Figure 7: The system is choosing its action unifor...
Processed /co

 40%|████      | 40/100 [01:05<00:55,  1.09it/s]

[+] Combined figure saved as /content/training_data_images/1804.03980/figure14_1.png
[+] Caption: Figure 5: a) Unigram statistics of symbol usage br...
[+] Figure saved as /content/training_data_images/1804.03980/figure14_2.png
[+] Caption: Figure 6: Bigram usage in all interaction pairs be...
Processed /content/sample_pdfs/sample/sample/1804.03980.pdf: 6 figures, 6 captions found
Processed /content/sample_pdfs/sample/sample/1811.07871v1.pdf: 0 figures, 0 captions found
[+] Combined figure saved as /content/training_data_images/1804.03984/figure2_1.png
[+] Caption: Figure 1: High-level overview of the referential g...
[+] Figure saved as /content/training_data_images/1804.03984/figure5_1.png
[+] Caption: Figure 2: Training curves of different experimenta...
[+] Combined figure saved as /content/training_data_images/1804.03984/figure7_1.png
[+] Caption: Figure 3: left: Three languages with different pro...


 42%|████▏     | 42/100 [01:10<01:30,  1.56s/it]

[+] Combined figure saved as /content/training_data_images/1804.03984/figure9_1.png
[+] Caption: Figure 4: Target images and their associated messa...
Processed /content/sample_pdfs/sample/sample/1804.03984.pdf: 4 figures, 4 captions found
[+] Combined figure saved as /content/training_data_images/1812.01483v1/figure2_1.png
[+] Caption: Figure 1: Joint unsupervised learning of task segm...
[+] Figure saved as /content/training_data_images/1812.01483v1/figure6_1.jpeg
[+] Caption: Figure 3: Example of multi-task
environment (2D gr...
[+] Combined figure saved as /content/training_data_images/1812.01483v1/figure8_1.png
[+] Caption: the correct order, without mistakes (i.e. without ...
[+] Combined figure saved as /content/training_data_images/1812.01483v1/figure15_1.png
[+] Caption: In Figure 10, we investigate termination locations...


 43%|████▎     | 43/100 [01:12<01:25,  1.50s/it]

[+] Figure saved as /content/training_data_images/1812.01483v1/figure16_1.png
[+] Caption: Figure 10: Heatmap of termination locations for ea...
[+] Combined figure saved as /content/training_data_images/1812.01483v1/figure16_2.png
[+] Caption: Figure 8: Example of sub-task policies discovered ...
Processed /content/sample_pdfs/sample/sample/1812.01483v1.pdf: 6 figures, 6 captions found


 44%|████▍     | 44/100 [01:13<01:18,  1.40s/it]

[+] Combined figure saved as /content/training_data_images/1811.01458v1/figure2_1.png
[+] Caption: Figure 1: a) In an MDP the action u is sampled fro...
[+] Figure saved as /content/training_data_images/1811.01458v1/figure5_1.png
[+] Caption: Figure 3: BAD, both with and without counterfactua...
[+] Combined figure saved as /content/training_data_images/1811.01458v1/figure7_1.png
[+] Caption: Figure 4: a) Hanabi training curves for BAD and th...
Processed /content/sample_pdfs/sample/sample/1811.01458v1.pdf: 3 figures, 3 captions found
Processed /content/sample_pdfs/sample/sample/1805.09208.pdf: 0 figures, 0 captions found
[+] Figure saved as /content/training_data_images/1806.11006/figure3_1.png
[+] Caption: Figure 1. Illustration of method of learned moment...
[+] Combined figure saved as /content/training_data_images/1806.11006/figure7_1.png
[+] Caption: To answer the ﬁrst question – does learning moment...
[+] Combined figure saved as /content/training_data_images/1806.11006/figure8

 46%|████▌     | 46/100 [01:14<00:54,  1.01s/it]

[+] Combined figure saved as /content/training_data_images/1806.11006/figure13_1.png
[+] Caption: Figure 5. Samples for only activation features, gr...
[+] Combined figure saved as /content/training_data_images/1806.11006/figure13_2.png
[+] Caption: Figure 6. CelebA samples for large generator train...
Processed /content/sample_pdfs/sample/sample/1806.11006.pdf: 6 figures, 6 captions found


 47%|████▋     | 47/100 [01:14<00:43,  1.22it/s]

[+] Combined figure saved as /content/training_data_images/1807.00196/figure6_1.png
[+] Caption: Figure 3: Learning dynamics for computing equilibr...
Processed /content/sample_pdfs/sample/sample/1807.00196.pdf: 1 figures, 1 captions found
[+] Figure saved as /content/training_data_images/1807.01670/figure2_1.png
[+] Caption: Figure 1: Example descriptions with corresponding ...
[+] Figure saved as /content/training_data_images/1807.01670/figure3_1.png
[+] Caption: Figure 2: Diagram of our model. A representation n...
[+] Figure saved as /content/training_data_images/1807.01670/figure4_1.png
[+] Caption: Figure 3: Samples generated from the synthetic (to...
[+] Figure saved as /content/training_data_images/1807.01670/figure5_1.png
[+] Caption: Figure 4: ELBO numbers for the model variants unde...
[+] Figure saved as /content/training_data_images/1807.01670/figure6_1.png
[+] Caption: Figure 5: Top, visual diagram of scene transformat...
[+] Figure saved as /content/training_data_images/

 48%|████▊     | 48/100 [01:18<01:26,  1.67s/it]

[+] Figure saved as /content/training_data_images/1807.01670/figure16_1.png
[!] No caption found for figure 16_1
[+] Figure saved as /content/training_data_images/1807.01670/figure16_2.png
[!] No caption found for figure 16_2
[+] Figure saved as /content/training_data_images/1807.01670/figure16_3.png
[!] No caption found for figure 16_3
[+] Figure saved as /content/training_data_images/1807.01670/figure16_4.png
[!] No caption found for figure 16_4
[+] Figure saved as /content/training_data_images/1807.01670/figure16_5.png
[!] No caption found for figure 16_5
[+] Figure saved as /content/training_data_images/1807.01670/figure16_6.png
[!] No caption found for figure 16_6
[+] Figure saved as /content/training_data_images/1807.01670/figure16_7.png
[!] No caption found for figure 16_7
[+] Figure saved as /content/training_data_images/1807.01670/figure16_8.png
[!] No caption found for figure 16_8
[+] Figure saved as /content/training_data_images/1807.01670/figure16_9.png
[!] No caption found

 49%|████▉     | 49/100 [01:19<01:22,  1.62s/it]

[+] Combined figure saved as /content/training_data_images/1812.02216v1/figure21_1.png
[+] Caption: Figure 6: Additional results for ﬁgure 2 (point ma...
[+] Figure saved as /content/training_data_images/1812.02216v1/figure22_1.jpeg
[+] Caption: Figure 8: Additional results for ﬁgure 4 (mobile b...
Processed /content/sample_pdfs/sample/sample/1812.02216v1.pdf: 7 figures, 7 captions found
[+] Combined figure saved as /content/training_data_images/1810.09951v1/figure4_1.png
[+] Caption: Fig. 1: Network architecture. Input images in each...


 50%|█████     | 50/100 [01:20<01:05,  1.31s/it]

[+] Combined figure saved as /content/training_data_images/1810.09951v1/figure15_1.png
[+] Caption: Fig. 4: Eﬀect of ghost clusters. Each row shows sh...
Processed /content/sample_pdfs/sample/sample/1810.09951v1.pdf: 2 figures, 2 captions found
[+] Figure saved as /content/training_data_images/1810.10802/figure1_1.png
[!] No caption found for figure 1_1
Processed /content/sample_pdfs/sample/sample/1810.10802.pdf: 1 figures, 0 captions found
Processed /content/sample_pdfs/sample/sample/1805.06370.pdf: 0 figures, 0 captions found
[+] Combined figure saved as /content/training_data_images/1806.02215/figure7_1.png
[+] Caption: Figure 1: Results of SpIN for solving two-dimensio...
[+] Combined figure saved as /content/training_data_images/1806.02215/figure8_1.png
[+] Caption: (b) Frames which most (top) and least (bottom) act...
[+] Figure saved as /content/training_data_images/1806.02215/figure18_1.png
[+] Caption: Figure 3: Training curves on bouncing ball videos
[+] Figure saved as /cont

 53%|█████▎    | 53/100 [01:22<00:47,  1.02s/it]

[+] Combined figure saved as /content/training_data_images/1806.02215/figure25_1.png
[+] Caption: Figure 10: Seaquest
[+] Combined figure saved as /content/training_data_images/1806.02215/figure26_1.png
[+] Caption: Figure 11: Space Invaders
Processed /content/sample_pdfs/sample/sample/1806.02215.pdf: 11 figures, 11 captions found
[+] Figure saved as /content/training_data_images/1807.03819/figure6_1.png
[+] Caption: Figure 3: Ponder time of UT with dynamic halting f...
[+] Figure saved as /content/training_data_images/1807.03819/figure13_1.png
[+] Caption: Figure 4: The Universal Transformer with position ...
[+] Combined figure saved as /content/training_data_images/1807.03819/figure18_1.png
[+] Caption: Figure 5: Visualization of the attention distribut...
[+] Combined figure saved as /content/training_data_images/1807.03819/figure19_1.png
[+] Caption: Figure 6: Visualization of the attention distribut...
[+] Combined figure saved as /content/training_data_images/1807.03819/figure20

 54%|█████▍    | 54/100 [01:24<00:54,  1.19s/it]

[+] Combined figure saved as /content/training_data_images/1807.03819/figure23_1.png
[+] Caption: Figure 7: Visualization of the attention distribut...
Processed /content/sample_pdfs/sample/sample/1807.03819.pdf: 8 figures, 6 captions found


 55%|█████▌    | 55/100 [01:25<00:45,  1.02s/it]

[+] Figure saved as /content/training_data_images/1806.04624v1/figure6_1.png
[+] Caption: Figure 2: (a) compares variants of ER and REM-Dyna...
Processed /content/sample_pdfs/sample/sample/1806.04624v1.pdf: 1 figures, 1 captions found


 56%|█████▌    | 56/100 [01:25<00:37,  1.16it/s]

[+] Combined figure saved as /content/training_data_images/1806.06923/figure4_1.png
[+] Caption: Figure 1. Network architectures for DQN and recent...
[+] Combined figure saved as /content/training_data_images/1806.06923/figure6_1.png
[+] Caption: Figure 2. Effect of varying N and N ′, the number ...
Processed /content/sample_pdfs/sample/sample/1806.06923.pdf: 2 figures, 2 captions found


 57%|█████▋    | 57/100 [01:25<00:29,  1.46it/s]

[+] Combined figure saved as /content/training_data_images/1807.09387v1/figure7_1.png
[+] Caption: Figure 4: Two different item distribution schedule...
Processed /content/sample_pdfs/sample/sample/1807.09387v1.pdf: 1 figures, 1 captions found
[+] Combined figure saved as /content/training_data_images/1807.03149v1/figure2_1.png
[+] Caption: Figure 1: The Minecraft random walk dataset for lo...
[+] Combined figure saved as /content/training_data_images/1807.03149v1/figure5_1.png
[+] Caption: Figure 4: The loss and predictive MSE for both the...
[+] Combined figure saved as /content/training_data_images/1807.03149v1/figure6_1.png
[+] Caption: Figure 5: Generated samples from the generative mo...
[+] Combined figure saved as /content/training_data_images/1807.03149v1/figure7_1.png
[+] Caption: Figure 6: Attention over the context images in the...
[+] Combined figure saved as /content/training_data_images/1807.03149v1/figure8_1.png
[+] Caption: Figure 7: Localization with the discriminativ

 58%|█████▊    | 58/100 [01:27<00:48,  1.15s/it]

[+] Combined figure saved as /content/training_data_images/1807.03149v1/figure13_1.png
[+] Caption: Figure 9: Localization with the discriminative and...
Processed /content/sample_pdfs/sample/sample/1807.03149v1.pdf: 6 figures, 6 captions found
[+] Combined figure saved as /content/training_data_images/1811.11359/figure8_1.png
[+] Caption: Figure 1: a) Percentage of goals successfully achi...
[+] Combined figure saved as /content/training_data_images/1811.11359/figure8_2.png
[+] Caption: Figure 2: Average achieved frames for point mass (...


 59%|█████▉    | 59/100 [01:30<00:57,  1.41s/it]

[+] Combined figure saved as /content/training_data_images/1811.11359/figure9_1.png
[+] Caption: Figure 3: Quantitative evaluation of goal achievem...
[+] Combined figure saved as /content/training_data_images/1811.11359/figure9_2.png
[+] Caption: Figure 4: Per-dimension quantitative evaluation of...
[+] Figure saved as /content/training_data_images/1811.11359/figure10_1.png
[+] Caption: Figure 5:
Average achieved frames over 30 trials f...
[+] Figure saved as /content/training_data_images/1811.11359/figure16_1.png
[+] Caption: Figure 6: Results for Control Suite tasks using th...
[+] Figure saved as /content/training_data_images/1811.11359/figure17_1.jpeg
[+] Caption: Figure 8: Per-dimension quantitative evaluation on...
Processed /content/sample_pdfs/sample/sample/1811.11359.pdf: 7 figures, 7 captions found
[+] Combined figure saved as /content/training_data_images/1804.02341/figure2_1.png
[+] Caption: Figure 1: The two-person image description game. S...
[+] Combined figure saved as

 60%|██████    | 60/100 [01:31<00:55,  1.38s/it]

[+] Combined figure saved as /content/training_data_images/1804.02341/figure16_1.png
[+] Caption: Figure 5: Ten communication examples when the spea...
[+] Combined figure saved as /content/training_data_images/1804.02341/figure17_1.png
[+] Caption: Figure 6: Scatter plots of the image embedding fro...
Processed /content/sample_pdfs/sample/sample/1804.02341.pdf: 6 figures, 6 captions found
[+] Combined figure saved as /content/training_data_images/1804.01118/figure1_1.png
[+] Caption: Figure 1. SPIRAL takes as input either random nois...
[+] Combined figure saved as /content/training_data_images/1804.01118/figure2_1.png
[+] Caption: Figure 2. The SPIRAL architecture. (a) An executio...
[+] Figure saved as /content/training_data_images/1804.01118/figure5_1.png
[+] Caption: Figure 3. Illustration
of
the
agent’s
action
space...
[+] Combined figure saved as /content/training_data_images/1804.01118/figure6_1.png
[+] Caption: Figure 5. Omniglot.
(a) A SPIRAL agent is trained ...
[+] Combined

 61%|██████    | 61/100 [01:38<02:00,  3.09s/it]

[+] Combined figure saved as /content/training_data_images/1804.01118/figure8_1.png
[+] Caption: Figure 10. 3D scene reconstructions.
The SPIRAL ag...
[+] Combined figure saved as /content/training_data_images/1804.01118/figure8_2.png
[+] Caption: Figure 8. ℓ2-distance between reconstructions and ...
Processed /content/sample_pdfs/sample/sample/1804.01118.pdf: 9 figures, 9 captions found
[+] Combined figure saved as /content/training_data_images/1811.04551v3/figure2_1.png
[+] Caption: Figure 1: Image-based control domains used in our ...
[+] Combined figure saved as /content/training_data_images/1811.04551v3/figure16_1.png
[+] Caption: Figure 8: Open-loop video predictions for test epi...
[+] Figure saved as /content/training_data_images/1811.04551v3/figure17_1.png
[+] Caption: Figure 9: Open-loop state diagnostics. We freeze t...


 62%|██████▏   | 62/100 [01:50<03:37,  5.72s/it]

[+] Combined figure saved as /content/training_data_images/1811.04551v3/figure18_1.png
[+] Caption: Figure 10: Planning performance on the cheetah run...
Processed /content/sample_pdfs/sample/sample/1811.04551v3.pdf: 4 figures, 4 captions found
[+] Figure saved as /content/training_data_images/1807.02089/figure8_1.png
[+] Caption: Figure 2. Empirical distribution of the log10 dela...
Processed /content/sample_pdfs/sample/sample/1807.02089.pdf: 1 figures, 1 captions found
[+] Combined figure saved as /content/training_data_images/1807.09647/figure7_1.png
[+] Caption: Figure 1: The DeepSea MDP


 64%|██████▍   | 64/100 [01:52<02:10,  3.62s/it]

[+] Combined figure saved as /content/training_data_images/1807.09647/figure9_1.png
[+] Caption: Figure 3: Value (as deﬁned in (14)) and log visita...
[+] Combined figure saved as /content/training_data_images/1807.09647/figure9_2.png
[+] Caption: Figure 4: Value (as deﬁned in (14)) and log visita...
Processed /content/sample_pdfs/sample/sample/1807.09647.pdf: 3 figures, 3 captions found
[+] Combined figure saved as /content/training_data_images/1808.00300/figure2_1.png
[+] Caption: Fig. 1: Given a natural image and a textual questi...
[+] Figure saved as /content/training_data_images/1808.00300/figure5_1.jpeg
[+] Caption: Fig. 2: Our hard attention replaces commonly used ...
[+] Combined figure saved as /content/training_data_images/1808.00300/figure14_1.png
[+] Caption: Fig. 3: Qualitative comparison between our variant...


 65%|██████▌   | 65/100 [01:55<01:57,  3.36s/it]

[+] Combined figure saved as /content/training_data_images/1808.00300/figure15_1.png
[+] Caption: Fig. 4: We show additional results with our AdaHAN...
[+] Combined figure saved as /content/training_data_images/1808.00300/figure16_1.png
[+] Caption: Fig. 5: Validation accuracy plots on CLEVR of the ...
Processed /content/sample_pdfs/sample/sample/1808.00300.pdf: 5 figures, 5 captions found
[+] Combined figure saved as /content/training_data_images/1811.08469v1/figure4_1.png
[+] Caption: Figure 1: Illustration
of the tandem game.
Processed /content/sample_pdfs/sample/sample/1811.08469v1.pdf: 1 figures, 1 captions found
[+] Figure saved as /content/training_data_images/1811.09656v1/figure3_1.png
[+] Caption: episodes are initialized along the motion capture ...
[+] Figure saved as /content/training_data_images/1811.09656v1/figure5_1.png
[+] Caption: Figure 2: Training settings for explicit training ...
[+] Figure saved as /content/training_data_images/1811.09656v1/figure5_2.png
[+] Capti

 67%|██████▋   | 67/100 [02:02<01:54,  3.47s/it]

[+] Figure saved as /content/training_data_images/1811.09656v1/figure18_1.png
[!] No caption found for figure 18_1
Processed /content/sample_pdfs/sample/sample/1811.09656v1.pdf: 10 figures, 5 captions found
[+] Combined figure saved as /content/training_data_images/1807.10066v1/figure2_1.png
[+] Caption: Figure 1: Network architecture. We build upon I3D ...
[+] Figure saved as /content/training_data_images/1807.10066v1/figure3_1.png
[+] Caption: Figure 2: Per-class performance. Performance of ou...


 68%|██████▊   | 68/100 [02:04<01:40,  3.13s/it]

[+] Combined figure saved as /content/training_data_images/1807.10066v1/figure4_1.png
[+] Caption: Figure 3: Ablations. Validation performance curves...
Processed /content/sample_pdfs/sample/sample/1807.10066v1.pdf: 3 figures, 3 captions found
[+] Figure saved as /content/training_data_images/1808.09352/figure2_1.png
[+] Caption: Figure 1: The Sally-Anne experiment setup from Bar...


 69%|██████▉   | 69/100 [02:05<01:18,  2.54s/it]

[+] Combined figure saved as /content/training_data_images/1808.09352/figure6_1.png
[+] Caption: Figure 3: Memory Network and Multiple Observer Mod...
Processed /content/sample_pdfs/sample/sample/1808.09352.pdf: 2 figures, 2 captions found
Processed /content/sample_pdfs/sample/sample/1812.01647v1.pdf: 0 figures, 0 captions found


 71%|███████   | 71/100 [02:05<00:45,  1.56s/it]

[+] Figure saved as /content/training_data_images/1805.11199/figure9_1.png
[+] Caption: Figure 4: StarCraft navigation results. Figure 4a ...
Processed /content/sample_pdfs/sample/sample/1805.11199.pdf: 1 figures, 1 captions found
Processed /content/sample_pdfs/sample/sample/1811.11214.pdf: 0 figures, 0 captions found
[+] Figure saved as /content/training_data_images/1807.03748/figure2_1.png
[+] Caption: Figure 1: Overview of Contrastive Predictive Codin...
[+] Figure saved as /content/training_data_images/1807.03748/figure5_1.png
[+] Caption: Figure 2: t-SNE visualization of audio (speech)
re...
[+] Figure saved as /content/training_data_images/1807.03748/figure5_2.png
[+] Caption: Figure 3: Average accuracy of predicting the
posit...


 73%|███████▎  | 73/100 [02:06<00:31,  1.17s/it]

[+] Combined figure saved as /content/training_data_images/1807.03748/figure7_1.png
[+] Caption: Figure 5: Every row shows image patches that activ...
[+] Figure saved as /content/training_data_images/1807.03748/figure9_1.png
[+] Caption: Figure 6: Reinforcement Learning results for 5 Dee...
Processed /content/sample_pdfs/sample/sample/1807.03748.pdf: 5 figures, 5 captions found


 74%|███████▍  | 74/100 [02:07<00:26,  1.00s/it]

[+] Figure saved as /content/training_data_images/1811.06272v1/figure7_1.png
[+] Caption: Figure 2: Experimental results on PO-SOKOBAN envir...
[+] Figure saved as /content/training_data_images/1811.06272v1/figure14_1.png
[+] Caption: Figure 3: Top: PO-SOKOBAN. Shown on the left is a ...
[+] Combined figure saved as /content/training_data_images/1811.06272v1/figure15_1.png
[+] Caption: Figure 4: Analysis of the model mismatch of the le...
Processed /content/sample_pdfs/sample/sample/1811.06272v1.pdf: 3 figures, 3 captions found
[+] Combined figure saved as /content/training_data_images/1808.10485v1/figure2_1.png
[+] Caption: Figure 1: An example sentence with syntactic, Prop...
Processed /content/sample_pdfs/sample/sample/1808.10485v1.pdf: 1 figures, 1 captions found
[+] Combined figure saved as /content/training_data_images/1806.03863/figure11_1.png
[+] Caption: All the results above were obtained when training ...


 76%|███████▌  | 76/100 [02:09<00:26,  1.11s/it]

[+] Combined figure saved as /content/training_data_images/1806.03863/figure11_2.png
[+] Caption: Fig. 5. Example outputs on a subset of frames one ...
[+] Combined figure saved as /content/training_data_images/1806.03863/figure15_1.png
[+] Caption: Fig. 9. Timeline for GPU usage for a sequential Pa...
[+] Figure saved as /content/training_data_images/1806.03863/figure17_1.png
[+] Caption: Fig. 10. Performance/eﬃciency trade-oﬀintroduced b...
Processed /content/sample_pdfs/sample/sample/1806.03863.pdf: 4 figures, 4 captions found


 77%|███████▋  | 77/100 [02:10<00:25,  1.11s/it]

[+] Combined figure saved as /content/training_data_images/1812.00898v1/figure2_1.png
[+] Caption: Figure 1: We build agents that can generate progra...
[+] Combined figure saved as /content/training_data_images/1812.00898v1/figure4_1.png
[+] Caption: Figure 3: (a) Discriminator’s architecture. It tak...
Processed /content/sample_pdfs/sample/sample/1812.00898v1.pdf: 2 figures, 2 captions found
[+] Figure saved as /content/training_data_images/1803.08884/figure3_1.png
[+] Caption: Figure 1: Screenshots from (A) the Cleanup game, (...
[+] Figure saved as /content/training_data_images/1803.08884/figure4_1.png
[+] Caption: Figure 2: The public goods game (Cleanup) and the ...
[+] Figure saved as /content/training_data_images/1803.08884/figure5_1.png
[+] Caption: Figure 3: Advantageous inequity aversion facilitat...
[+] Figure saved as /content/training_data_images/1803.08884/figure6_1.png
[+] Caption: In Harvest, we enforce cooperation by modifying th...
[+] Figure saved as /content/traini

 78%|███████▊  | 78/100 [02:11<00:23,  1.08s/it]

[+] Figure saved as /content/training_data_images/1803.08884/figure12_1.png
[+] Caption: Figure 8: Inequity aversion alters the effective p...
Processed /content/sample_pdfs/sample/sample/1803.08884.pdf: 8 figures, 8 captions found
[+] Combined figure saved as /content/training_data_images/1807.03064/figure3_1.png
[+] Caption: Figure 1: Random policy trajectories in a S-shaped...
[+] Combined figure saved as /content/training_data_images/1807.03064/figure3_2.png
[+] Caption: Figure 2: Random policy trajectories in a layout w...
[+] Figure saved as /content/training_data_images/1807.03064/figure3_3.png
[+] Caption: Figure 3: Left: Random policy trajectories in U-sh...
[+] Combined figure saved as /content/training_data_images/1807.03064/figure4_1.png
[+] Caption: Figure 4: Predictions of MC (left) and TD (right) ...
[+] Combined figure saved as /content/training_data_images/1807.03064/figure4_2.png
[+] Caption: Figure 5: Predictions of MC (left) and TD (right) ...
[+] Figure saved as /c

 79%|███████▉  | 79/100 [02:14<00:30,  1.47s/it]

[+] Combined figure saved as /content/training_data_images/1807.03064/figure10_1.png
[+] Caption: Figure 10: Warping of the original 2-dimensional s...
[+] Combined figure saved as /content/training_data_images/1807.03064/figure10_2.png
[+] Caption: Figure 11: MSVE of value function estimates learnt...
[+] Combined figure saved as /content/training_data_images/1807.03064/figure11_1.png
[+] Caption: Figure 12: Successor Features targets with γ = 0.9...
Processed /content/sample_pdfs/sample/sample/1807.03064.pdf: 11 figures, 11 captions found
Processed /content/sample_pdfs/sample/sample/1811.07004v1.pdf: 0 figures, 0 captions found
Processed /content/sample_pdfs/sample/sample/1805.10265.pdf: 0 figures, 0 captions found


 82%|████████▏ | 82/100 [02:14<00:13,  1.35it/s]

[+] Combined figure saved as /content/training_data_images/1811.04017v2/figure2_1.png
[+] Caption: Figure 1: General struc-
ture of a tensor chain
Fi...
[+] Figure saved as /content/training_data_images/1811.04017v2/figure2_2.png
[+] Caption: Figure 3: Chain structure of a
SPDZ tensor
Processed /content/sample_pdfs/sample/sample/1811.04017v2.pdf: 2 figures, 2 captions found
[+] Figure saved as /content/training_data_images/1807.05211v1/figure2_1.jpeg
[+] Caption: Figure 1: Graph of the environment built from a si...
[+] Combined figure saved as /content/training_data_images/1807.05211v1/figure3_1.png
[+] Caption: Figure 2: Example imagery from the omnidirectional...
[+] Combined figure saved as /content/training_data_images/1807.05211v1/figure4_1.png
[+] Caption: Figure 3: Trajectories from a variety of initial l...
[+] Figure saved as /content/training_data_images/1807.05211v1/figure11_1.jpeg
[!] No caption found for figure 11_1
[+] Figure saved as /content/training_data_images/1807.0

 83%|████████▎ | 83/100 [02:15<00:12,  1.41it/s]

[+] Combined figure saved as /content/training_data_images/1807.05211v1/figure13_1.png
[+] Caption: Each deployment trajectory is presented individual...
[+] Figure saved as /content/training_data_images/1807.05211v1/figure14_1.jpeg
[!] No caption found for figure 14_1
[+] Figure saved as /content/training_data_images/1807.05211v1/figure14_2.jpeg
[!] No caption found for figure 14_2
[+] Figure saved as /content/training_data_images/1807.05211v1/figure14_3.jpeg
[!] No caption found for figure 14_3
[+] Figure saved as /content/training_data_images/1807.05211v1/figure14_4.jpeg
[!] No caption found for figure 14_4
[+] Figure saved as /content/training_data_images/1807.05211v1/figure14_5.jpeg
[!] No caption found for figure 14_5
[+] Figure saved as /content/training_data_images/1807.05211v1/figure14_6.jpeg
[!] No caption found for figure 14_6
[+] Figure saved as /content/training_data_images/1807.05211v1/figure14_7.jpeg
[!] No caption found for figure 14_7
[+] Figure saved as /content/train

 84%|████████▍ | 84/100 [02:20<00:27,  1.70s/it]

[+] Combined figure saved as /content/training_data_images/1806.01780/figure9_1.png
[+] Caption: Figure 10. Exemplary tasks of interest from DM Lab...
Processed /content/sample_pdfs/sample/sample/1806.01780.pdf: 10 figures, 10 captions found
[+] Combined figure saved as /content/training_data_images/1806.03107/figure7_1.png
[+] Caption: Figure 2: MiniPacman. Left: A full frame from the ...
[+] Figure saved as /content/training_data_images/1806.03107/figure7_2.png
[+] Caption: Figure 3: Moving MNIST. Left: Rows are example inp...
[+] Combined figure saved as /content/training_data_images/1806.03107/figure8_1.png
[+] Caption: Figure 4: Skip-state prediction for 1D signal. The...
[+] Figure saved as /content/training_data_images/1806.03107/figure9_1.png
[+] Caption: Figure 6: Rollout from the model. The model was tr...
[+] Combined figure saved as /content/training_data_images/1806.03107/figure9_2.png
[+] Caption: Figure 5: Beliefs of the model. Left: Independent ...
[+] Figure saved as /

 85%|████████▌ | 85/100 [02:21<00:23,  1.59s/it]

[+] Figure saved as /content/training_data_images/1806.03107/figure15_1.png
[+] Caption: Figure 8: Deep version of the model from Figure 1....
Processed /content/sample_pdfs/sample/sample/1806.03107.pdf: 7 figures, 7 captions found
[+] Figure saved as /content/training_data_images/1811.10928/figure8_1.png
[+] Caption: Figure 3: Node expansions for Sokoban on log-scale...
[+] Figure saved as /content/training_data_images/1811.10928/figure12_1.png
[+] Caption: Figure 4: Learning curves of A3C for the 4 chosen ...
Processed /content/sample_pdfs/sample/sample/1811.10928.pdf: 2 figures, 2 captions found
[+] Figure saved as /content/training_data_images/1807.05162v1/figure2_1.png
[+] Caption: Figure 1: The full visual speech recognition syste...
[+] Combined figure saved as /content/training_data_images/1807.05162v1/figure9_1.png
[+] Caption: By predicting phonemes directly, we also side-step...
[+] Combined figure saved as /content/training_data_images/1807.05162v1/figure9_2.png
[+] Caption

 87%|████████▋ | 87/100 [02:23<00:17,  1.33s/it]

[+] Figure saved as /content/training_data_images/1807.05162v1/figure19_1.jpeg
[+] Caption: Figure 9: Random sample of test-set lip images fro...
Processed /content/sample_pdfs/sample/sample/1807.05162v1.pdf: 6 figures, 6 captions found
[+] Combined figure saved as /content/training_data_images/1811.09716v1/figure4_1.png
[+] Caption: Figure 3: Illustration of the negative of the loss...
[+] Combined figure saved as /content/training_data_images/1811.09716v1/figure7_1.png
[+] Caption: Figure 8: Similar plot to Fig. 3, but where the lo...
[+] Combined figure saved as /content/training_data_images/1811.09716v1/figure8_1.png
[+] Caption: Figure 11: Visualizations of perturbed images and ...
[+] Combined figure saved as /content/training_data_images/1811.09716v1/figure8_2.png
[+] Caption: Figure 10: Evolution throughout the course of CURE...


 88%|████████▊ | 88/100 [02:26<00:21,  1.75s/it]

[+] Combined figure saved as /content/training_data_images/1811.09716v1/figure11_1.png
[+] Caption: Figure 13: Illustration of the negative of the los...
Processed /content/sample_pdfs/sample/sample/1811.09716v1.pdf: 5 figures, 5 captions found
[+] Figure saved as /content/training_data_images/1808.07992v1/figure4_1.png
[+] Caption: Fig. 1: Gestational Age (GA) vs Birth Weight (BW) ...
[+] Figure saved as /content/training_data_images/1808.07992v1/figure5_1.png
[+] Caption: Fig. 2: ROC curve for standard random forest (RF),...
Processed /content/sample_pdfs/sample/sample/1808.07992v1.pdf: 2 figures, 2 captions found
Processed /content/sample_pdfs/sample/sample/1809.02292v2.pdf: 0 figures, 0 captions found
[+] Combined figure saved as /content/training_data_images/1806.06920/figure7_1.png
[+] Caption: For our experiments we evaluate our MPO algorithm ...


 91%|█████████ | 91/100 [02:29<00:12,  1.36s/it]

[+] Figure saved as /content/training_data_images/1806.06920/figure11_1.jpeg
[+] Caption: Figure 4: Complete comparison of results for the c...
Processed /content/sample_pdfs/sample/sample/1806.06920.pdf: 2 figures, 2 captions found
Processed /content/sample_pdfs/sample/sample/1805.09247.pdf: 0 figures, 0 captions found
[+] Figure saved as /content/training_data_images/1803.06959/figure3_1.png
[+] Caption: Figure 1: Memorizing networks are more sensitive t...
[+] Combined figure saved as /content/training_data_images/1803.06959/figure4_1.png
[+] Caption: Figure 2: Memorizing networks are more sensitive t...
[+] Combined figure saved as /content/training_data_images/1803.06959/figure6_1.png
[+] Caption: Figure 4: Single direction reliance as a signal fo...
[+] Combined figure saved as /content/training_data_images/1803.06959/figure7_1.png
[+] Caption: Figure 6: Batch normalization decreases class sele...
[+] Combined figure saved as /content/training_data_images/1803.06959/figure8_1.png

 93%|█████████▎| 93/100 [02:34<00:11,  1.70s/it]

[+] Figure saved as /content/training_data_images/1803.06959/figure15_1.png
[!] No caption found for figure 15_1
[+] Figure saved as /content/training_data_images/1803.06959/figure15_2.png
[!] No caption found for figure 15_2
[+] Figure saved as /content/training_data_images/1803.06959/figure15_3.png
[!] No caption found for figure 15_3
[+] Figure saved as /content/training_data_images/1803.06959/figure15_4.png
[!] No caption found for figure 15_4
[+] Figure saved as /content/training_data_images/1803.06959/figure15_5.png
[!] No caption found for figure 15_5
Processed /content/sample_pdfs/sample/sample/1803.06959.pdf: 15 figures, 5 captions found
[+] Figure saved as /content/training_data_images/1802.10567/figure6_1.jpeg
[+] Caption: Figure 2. Learning times for a subset of the 13 au...
[+] Combined figure saved as /content/training_data_images/1802.10567/figure7_1.png
[+] Caption: Figure 4. Depiction of the agent stacking two bloc...
[+] Combined figure saved as /content/training_data

 94%|█████████▍| 94/100 [02:38<00:12,  2.15s/it]

[+] Combined figure saved as /content/training_data_images/1802.10567/figure8_1.png
[+] Caption: Figure 9. Learning statistics for a real robot exp...
[+] Figure saved as /content/training_data_images/1802.10567/figure8_2.png
[+] Caption: Figure 10. Image sequence depicting a trained SAC-...
[+] Figure saved as /content/training_data_images/1802.10567/figure8_3.jpeg
[+] Caption: Figure 8. Expected reward in the ’clean-up’ experi...
[+] Combined figure saved as /content/training_data_images/1802.10567/figure16_1.png
[+] Caption: Figure 12. Comparison of full auxiliary and extrin...
[+] Figure saved as /content/training_data_images/1802.10567/figure16_2.jpeg
[+] Caption: The learned distribution of Q values at the end of...
Processed /content/sample_pdfs/sample/sample/1802.10567.pdf: 8 figures, 8 captions found


 95%|█████████▌| 95/100 [02:39<00:08,  1.79s/it]

[+] Combined figure saved as /content/training_data_images/1803.09001/figure4_1.png
[+] Caption: Fig. 1: a) Dayan’s grid world. Arrows indicate the...
[+] Figure saved as /content/training_data_images/1803.09001/figure4_2.png
[+] Caption: Fig. 2: All predictors learn from scratch with new...
[+] Figure saved as /content/training_data_images/1803.09001/figure5_1.jpeg
[+] Caption: Fig. 3: The user controls the robot arm using a jo...
[+] Figure saved as /content/training_data_images/1803.09001/figure5_2.png
[+] Caption: Fig. 4: A 12 minute run tracing the maze circuit. ...
[+] Figure saved as /content/training_data_images/1803.09001/figure6_1.png
[+] Caption: Fig. 5: The same results as Figure 4, but with the...
Processed /content/sample_pdfs/sample/sample/1803.09001.pdf: 5 figures, 5 captions found
[+] Combined figure saved as /content/training_data_images/1806.01261/figure8_1.png
[+] Caption: Figure 2: Diﬀerent graph representations. (a) A mo...
[+] Figure saved as /content/training_da

 96%|█████████▌| 96/100 [02:40<00:06,  1.64s/it]

[+] Figure saved as /content/training_data_images/1806.01261/figure21_1.png
[!] No caption found for figure 21_1
[+] Figure saved as /content/training_data_images/1806.01261/figure21_2.png
[!] No caption found for figure 21_2
[+] Figure saved as /content/training_data_images/1806.01261/figure21_3.png
[!] No caption found for figure 21_3
Processed /content/sample_pdfs/sample/sample/1806.01261.pdf: 5 figures, 2 captions found
[+] Figure saved as /content/training_data_images/1807.02033/figure5_1.jpeg
[+] Caption: Figure 2: JUMP is trained using an approximate pos...
[+] Combined figure saved as /content/training_data_images/1807.02033/figure6_1.png
[+] Caption: Figure 3: In the “Color Reaction” narrative, shape...


 97%|█████████▋| 97/100 [02:40<00:04,  1.38s/it]

[+] Combined figure saved as /content/training_data_images/1807.02033/figure7_1.png
[+] Caption: Figure 4: In the Traveling Salesman narrative, one...
[+] Combined figure saved as /content/training_data_images/1807.02033/figure7_2.png
[+] Caption: Figure 5: Quantitative comparisons between JUMP an...
[+] Figure saved as /content/training_data_images/1807.02033/figure8_1.png
[+] Caption: Figure 6: A cube in a room, with MNIST digits engr...
Processed /content/sample_pdfs/sample/sample/1807.02033.pdf: 5 figures, 5 captions found
[+] Combined figure saved as /content/training_data_images/1810.10510v1/figure9_1.png
[+] Caption: Figure 3: Semantic keypoint transfer. The
annotate...
[+] Combined figure saved as /content/training_data_images/1810.10510v1/figure9_2.png
[+] Caption: Figure 4: Instance-level matching. Top row: inlier...
[+] Combined figure saved as /content/training_data_images/1810.10510v1/figure14_1.png
[+] Caption: Figure 5: Additional examples of semantic keypoint...
[+] Com

 98%|█████████▊| 98/100 [02:50<00:06,  3.44s/it]

[+] Combined figure saved as /content/training_data_images/1810.10510v1/figure19_2.png
[+] Caption: Figure 11: Indoor Venues Dataset (IVD). We show so...
Processed /content/sample_pdfs/sample/sample/1810.10510v1.pdf: 9 figures, 9 captions found
[+] Combined figure saved as /content/training_data_images/1805.09786/figure2_1.png
[+] Caption: Figure 1: An intuitive depiction of how images mig...
[+] Combined figure saved as /content/training_data_images/1805.09786/figure13_1.png
[+] Caption: Figure 7: An illustration of how trees can be repr...
[+] Combined figure saved as /content/training_data_images/1805.09786/figure13_2.png
[+] Caption: Figure 6: We show an example of a curriculum on th...


 99%|█████████▉| 99/100 [03:23<00:11, 11.52s/it]

[+] Combined figure saved as /content/training_data_images/1805.09786/figure15_1.png
[+] Caption: Figure 9: A depiction of a hyperbolic recursive tr...
Processed /content/sample_pdfs/sample/sample/1805.09786.pdf: 4 figures, 4 captions found
[+] Combined figure saved as /content/training_data_images/1806.01822/figure5_1.png
[+] Caption: Figure 2: Tasks. We tested the RMC on a suite of s...
[+] Figure saved as /content/training_data_images/1806.01822/figure7_1.png
[+] Caption: Figure 3: Model analysis. Each row depicts the att...
[+] Figure saved as /content/training_data_images/1806.01822/figure15_1.png
[+] Caption: Figure 6: Samples of programmatic tasks. Note that...
[+] Figure saved as /content/training_data_images/1806.01822/figure15_2.png
[+] Caption: Figure 7: Memorization tasks. Each sub-task takes ...


100%|██████████| 100/100 [03:23<00:00,  2.04s/it]

[+] Figure saved as /content/training_data_images/1806.01822/figure16_1.png
[+] Caption: Figure 8: Programmatic results. From left to right...
[+] Figure saved as /content/training_data_images/1806.01822/figure16_2.png
[+] Caption: Figure 9: Example BoxWorld level. The left panel s...
[+] Combined figure saved as /content/training_data_images/1806.01822/figure17_1.png
[+] Caption: Figure 10: Mini Pacman Results.
Processed /content/sample_pdfs/sample/sample/1806.01822.pdf: 7 figures, 7 captions found





All extracted images and captions have been saved to Google Drive at: /content/drive/MyDrive/training_data_images
