In [1]:
import datetime
from dotenv import load_dotenv
load_dotenv(override=True)

from ai_ta_backend.utils.types import DocumentMetadata


In [2]:
# Random test paper copy/pasted from from ArXiv: https://arxiv.org/html/2404.09995v1
raw_text = """License: CC BY-NC-ND 4.0
arXiv:2404.09995v1 [cs.CV] 15 Apr 2024
(eccv) Package eccv Warning: Package 'hyperref' is loaded with option 'pagebackref', which is *not* recommended for camera-ready version

1
Taming Latent Diffusion Model for Neural Radiance Field Inpainting
Chieh Hubert Lin
1122
Changil Kim
11
Jia-Bin Huang
1133
Qinbo Li
11
Chih Yao Ma
11
Johannes Kopf
11
Ming-Hsuan Yang
22
Hung-Yu Tseng
11
Abstract
Neural Radiance Field (NeRF) is a representation for 3D reconstruction from multi-view images. Despite some recent work showing preliminary success in editing a reconstructed NeRF with diffusion prior, they remain struggling to synthesize reasonable geometry in completely uncovered regions. One major reason is the high diversity of synthetic contents from the diffusion model, which hinders the radiance field from converging to a crisp and deterministic geometry. Moreover, applying latent diffusion models on real data often yields a textural shift incoherent to the image condition due to auto-encoding errors. These two problems are further reinforced with the use of pixel-distance losses. To address these issues, we propose tempering the diffusion model's stochasticity with per-scene customization and mitigating the textural shift with masked adversarial training. During the analyses, we also found the commonly used pixel and perceptual losses are harmful in the NeRF inpainting task. Through rigorous experiments, our framework yields state-of-the-art NeRF inpainting results on various real-world scenes.

Refer to caption
Figure 1:NeRF inpainting. Given a set of posed images associated with inpainting masks, the proposed framework estimates a NeRF that renders high-quality novel views, where the inpainting region is realistic and contains high-frequency details.
1Introduction
The recent advancements in neural radiance fields (NeRF) [24, 27, 3] have achieved high-quality 3D reconstruction and novel-view synthesis of scenes captured with a collection of images. The success intrigues an increasing attention on manipulating NeRFs such as 3D scene stylization [38, 8] and NeRF editing [13]. In this work, we focus on the NeRF inpainting problem. As shown in Figure 1, given a set of images of a scene with the inpainting masks, our goal is to estimate a completed NeRF that renders high-quality images at novel viewpoints. The NeRF inpainting task enables a variety of 3D content creation applications such as removing objects from a scene [26, 39], completing non-observed part of the scene, and hallucinating contents in the designated regions.
"""

In [7]:
import marvin
marvin.extract(raw_text, target=DocumentMetadata)

[DocumentMetadata(authors=['Chieh Hubert Lin', 'Changil Kim', 'Jia-Bin Huang', 'Qinbo Li', 'Chih Yao Ma', 'Johannes Kopf', 'Ming-Hsuan Yang', 'Hung-Yu Tseng'], journal_name='arXiv', publication_date=datetime.date(2024, 4, 15), keywords=['Neural Radiance Field', 'NeRF', 'inpainting', 'latent diffusion model', '3D reconstruction', 'multi-view images', 'synthetic contents', 'geometry', 'stochasticity', 'textural shift', 'auto-encoding errors', 'pixel-distance losses', 'per-scene customization', 'masked adversarial training', 'pixel and perceptual losses', 'real-world scenes'], doi='2404.09995v1', title='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', subtitle=None, visible_urls=[], field_of_science='Computer Vision', concise_summary='This paper addresses the challenges in Neural Radiance Field (NeRF) inpainting by proposing a framework that tempers the stochasticity of diffusion models and mitigates textural shifts with masked adversarial training. It also identifies 

In [3]:
# Result
res = DocumentMetadata(authors=['Chieh Hubert Lin', 'Changil Kim', 'Jia-Bin Huang', 'Qinbo Li', 'Chih Yao Ma', 'Johannes Kopf', 'Ming-Hsuan Yang', 'Hung-Yu Tseng'], 
                  journal_name='arXiv', 
                  publication_date=datetime.date(2024, 4, 15), 
                  keywords=['Neural Radiance Field', 'NeRF', 'inpainting', 'latent diffusion model', '3D reconstruction', 'multi-view images', 'synthetic contents', 'geometry', 'stochasticity', 'textural shift', 'auto-encoding errors', 'pixel-distance losses', 'per-scene customization', 'masked adversarial training', 'pixel and perceptual losses', 'real-world scenes'], 
                  doi='2404.09995v1',
                  title='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', 
                  subtitle=None, 
                  visible_urls=[], 
                  field_of_science='Computer Vision', 
                  concise_summary='This paper addresses the challenges in Neural Radiance Field (NeRF) inpainting by proposing a framework that tempers the stochasticity of diffusion models and mitigates textural shifts with masked adversarial training. It also identifies commonly used pixel and perceptual losses as detrimental in NeRF inpainting tasks and demonstrates state-of-the-art results through rigorous experiments.', 
                  specific_questions_document_can_answer=['What are the challenges in Neural Radiance Field inpainting?', 'How does the proposed framework improve NeRF inpainting?', 'Why are pixel and perceptual losses harmful in NeRF inpainting tasks?', 'What methods are used to mitigate textural shifts in NeRF inpainting?'])
res

DocumentMetadata(authors=['Chieh Hubert Lin', 'Changil Kim', 'Jia-Bin Huang', 'Qinbo Li', 'Chih Yao Ma', 'Johannes Kopf', 'Ming-Hsuan Yang', 'Hung-Yu Tseng'], journal_name='arXiv', publication_date=datetime.date(2024, 4, 15), keywords=['Neural Radiance Field', 'NeRF', 'inpainting', 'latent diffusion model', '3D reconstruction', 'multi-view images', 'synthetic contents', 'geometry', 'stochasticity', 'textural shift', 'auto-encoding errors', 'pixel-distance losses', 'per-scene customization', 'masked adversarial training', 'pixel and perceptual losses', 'real-world scenes'], doi='2404.09995v1', title='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', subtitle=None, visible_urls=[], field_of_science='Computer Vision', concise_summary='This paper addresses the challenges in Neural Radiance Field (NeRF) inpainting by proposing a framework that tempers the stochasticity of diffusion models and mitigates textural shifts with masked adversarial training. It also identifies c

In [7]:
### LLM PARSING
from openai import OpenAI # pip install openai>=1.0

client = OpenAI(
    api_key="irrelevant", # any non-empty string
    base_url = "https://api.ncsa.ai/llm/v1" ## 👈 ONLY CODE CHANGE ##
)

completion = client.chat.completions.create(
    model="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", # way better than mistral instruct v0.2. Works great! As good as GPT-4 so far.
    messages=[
        # {"role": "system", "content": "You are an expert at categorizing scientific papers. Please categorize the following paper."},
        {"role": "user", "content": "You are an expert at categorizing scientific papers. Please categorize the following paper.\n" + raw_text},
    ],
    extra_body={"guided_json": DocumentMetadata.schema()},
    temperature=0.2,
    stream=True,
)

# ⚡️⚡️ streaming 
final = ""
for chunk in completion:
    print(chunk.choices[0].delta.content or "", end="")
    final += chunk.choices[0].delta.content or ""

# print(completion.choices[0].message.content)

InternalServerError: Error code: 503 - Service Temporarily Unavailable

In [69]:
doc = DocumentMetadata.parse_raw(final)
doc.dict()

{'authors': ['Chieh Hubert Lin',
  'Changil Kim',
  'Jia-Bin Huang',
  'Qinbo Li',
  'Chih Yao Ma',
  'Johannes Kopf',
  'Ming-Hsuan Yang',
  'Hung-Yu Tseng'],
 'journal_name': 'eccv',
 'publication_date': '15 Apr 2024',
 'keywords': ['Neural Radiance Field',
  'NeRF inpainting',
  'latent diffusion model',
  'stochasticity',
  'textural shift',
  'per-scene customization',
  'masked adversarial training',
  'pixel and perceptual losses'],
 'doi': '2404.09995v1',
 'title': 'Taming Latent Diffusion Model for Neural Radiance Field Inpainting',
 'subtitle': 'Taming Latent Diffusion Model for Neural Radiance Field Inpainting',
 'visible_urls': ['https://arxiv.org/abs/2404.09995v1'],
 'field_of_science': 'Computer Science',
 'concise_summary': 'This paper proposes a framework for NeRF inpainting that addresses issues related to stochasticity and textural shift in the latent diffusion model. The framework uses per-scene customization and masked adversarial training to improve the quality of 

In [None]:
from SQLite import insert_doc
insert_doc(doc, commit_on_change=True)