In [46]:
import datetime
from typing import Optional
from dotenv import load_dotenv
load_dotenv(override=True)

import pydantic

class DocumentMetadata(pydantic.BaseModel):
    authors: list[str]
    journal_name: str
    publication_date: str  # Changed from datetime.date to str
    keywords: list[str]
    doi: str
    title: str
    subtitle: Optional[str]
    visible_urls: list[str]
    field_of_science: str
    concise_summary: str
    questions_document_can_answer: list[str]


In [47]:
# Random test paper copy/pasted from from ArXiv: https://arxiv.org/html/2404.09995v1
raw_text = """License: CC BY-NC-ND 4.0
arXiv:2404.09995v1 [cs.CV] 15 Apr 2024
(eccv) Package eccv Warning: Package 'hyperref' is loaded with option 'pagebackref', which is *not* recommended for camera-ready version

1
Taming Latent Diffusion Model for Neural Radiance Field Inpainting
Chieh Hubert Lin
1122
Changil Kim
11
Jia-Bin Huang
1133
Qinbo Li
11
Chih Yao Ma
11
Johannes Kopf
11
Ming-Hsuan Yang
22
Hung-Yu Tseng
11
Abstract
Neural Radiance Field (NeRF) is a representation for 3D reconstruction from multi-view images. Despite some recent work showing preliminary success in editing a reconstructed NeRF with diffusion prior, they remain struggling to synthesize reasonable geometry in completely uncovered regions. One major reason is the high diversity of synthetic contents from the diffusion model, which hinders the radiance field from converging to a crisp and deterministic geometry. Moreover, applying latent diffusion models on real data often yields a textural shift incoherent to the image condition due to auto-encoding errors. These two problems are further reinforced with the use of pixel-distance losses. To address these issues, we propose tempering the diffusion model's stochasticity with per-scene customization and mitigating the textural shift with masked adversarial training. During the analyses, we also found the commonly used pixel and perceptual losses are harmful in the NeRF inpainting task. Through rigorous experiments, our framework yields state-of-the-art NeRF inpainting results on various real-world scenes.

Refer to caption
Figure 1:NeRF inpainting. Given a set of posed images associated with inpainting masks, the proposed framework estimates a NeRF that renders high-quality novel views, where the inpainting region is realistic and contains high-frequency details.
1Introduction
The recent advancements in neural radiance fields (NeRF) [24, 27, 3] have achieved high-quality 3D reconstruction and novel-view synthesis of scenes captured with a collection of images. The success intrigues an increasing attention on manipulating NeRFs such as 3D scene stylization [38, 8] and NeRF editing [13]. In this work, we focus on the NeRF inpainting problem. As shown in Figure 1, given a set of images of a scene with the inpainting masks, our goal is to estimate a completed NeRF that renders high-quality images at novel viewpoints. The NeRF inpainting task enables a variety of 3D content creation applications such as removing objects from a scene [26, 39], completing non-observed part of the scene, and hallucinating contents in the designated regions.
"""

In [7]:
import marvin
marvin.extract(raw_text, target=DocumentMetadata)

[DocumentMetadata(authors=['Chieh Hubert Lin', 'Changil Kim', 'Jia-Bin Huang', 'Qinbo Li', 'Chih Yao Ma', 'Johannes Kopf', 'Ming-Hsuan Yang', 'Hung-Yu Tseng'], journal_name='arXiv', publication_date=datetime.date(2024, 4, 15), keywords=['Neural Radiance Field', 'NeRF', 'inpainting', 'latent diffusion model', '3D reconstruction', 'multi-view images', 'synthetic contents', 'geometry', 'stochasticity', 'textural shift', 'auto-encoding errors', 'pixel-distance losses', 'per-scene customization', 'masked adversarial training', 'pixel and perceptual losses', 'real-world scenes'], doi='2404.09995v1', title='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', subtitle=None, visible_urls=[], field_of_science='Computer Vision', concise_summary='This paper addresses the challenges in Neural Radiance Field (NeRF) inpainting by proposing a framework that tempers the stochasticity of diffusion models and mitigates textural shifts with masked adversarial training. It also identifies 

In [12]:
# Result
res = [DocumentMetadata(authors=['Chieh Hubert Lin', 'Changil Kim', 'Jia-Bin Huang', 'Qinbo Li', 'Chih Yao Ma', 'Johannes Kopf', 'Ming-Hsuan Yang', 'Hung-Yu Tseng'], 
                  journal_name='arXiv', 
                  publication_date=datetime.date(2024, 4, 15), 
                  keywords=['Neural Radiance Field', 'NeRF', 'inpainting', 'latent diffusion model', '3D reconstruction', 'multi-view images', 'synthetic contents', 'geometry', 'stochasticity', 'textural shift', 'auto-encoding errors', 'pixel-distance losses', 'per-scene customization', 'masked adversarial training', 'pixel and perceptual losses', 'real-world scenes'], 
                  doi='2404.09995v1',
                  title='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', 
                  subtitle=None, 
                  visible_urls=[], 
                  field_of_science='Computer Vision', 
                  concise_summary='This paper addresses the challenges in Neural Radiance Field (NeRF) inpainting by proposing a framework that tempers the stochasticity of diffusion models and mitigates textural shifts with masked adversarial training. It also identifies commonly used pixel and perceptual losses as detrimental in NeRF inpainting tasks and demonstrates state-of-the-art results through rigorous experiments.', 
                  questions_document_can_answer=['What are the challenges in Neural Radiance Field inpainting?', 'How does the proposed framework improve NeRF inpainting?', 'Why are pixel and perceptual losses harmful in NeRF inpainting tasks?', 'What methods are used to mitigate textural shifts in NeRF inpainting?'])
]

In [13]:
res

[DocumentMetadata(authors=['Chieh Hubert Lin', 'Changil Kim', 'Jia-Bin Huang', 'Qinbo Li', 'Chih Yao Ma', 'Johannes Kopf', 'Ming-Hsuan Yang', 'Hung-Yu Tseng'], journal_name='arXiv', publication_date=datetime.date(2024, 4, 15), keywords=['Neural Radiance Field', 'NeRF', 'inpainting', 'latent diffusion model', '3D reconstruction', 'multi-view images', 'synthetic contents', 'geometry', 'stochasticity', 'textural shift', 'auto-encoding errors', 'pixel-distance losses', 'per-scene customization', 'masked adversarial training', 'pixel and perceptual losses', 'real-world scenes'], doi='2404.09995v1', title='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', subtitle=None, visible_urls=[], field_of_science='Computer Vision', concise_summary='This paper addresses the challenges in Neural Radiance Field (NeRF) inpainting by proposing a framework that tempers the stochasticity of diffusion models and mitigates textural shifts with masked adversarial training. It also identifies 

In [16]:
import json
import mysql.connector
from datetime import date

# Replace these with your connection details
config = {
    'user': 'root',
    'password': ,  # Replace with your MySQL root password
    'host': '127.0.0.1',
    'database': 'science',     # Replace with your database name
    'raise_on_warnings': True
}

# Result
res = [DocumentMetadata(authors=['Chieh Hubert Lin', 'Changil Kim', 'Jia-Bin Huang', 'Qinbo Li', 'Chih Yao Ma', 'Johannes Kopf', 'Ming-Hsuan Yang', 'Hung-Yu Tseng'], 
                  journal_name='arXiv', 
                  publication_date=datetime.date(2024, 4, 15), 
                  keywords=['Neural Radiance Field', 'NeRF', 'inpainting', 'latent diffusion model', '3D reconstruction', 'multi-view images', 'synthetic contents', 'geometry', 'stochasticity', 'textural shift', 'auto-encoding errors', 'pixel-distance losses', 'per-scene customization', 'masked adversarial training', 'pixel and perceptual losses', 'real-world scenes'], 
                  doi='2404.09995v1',
                  title='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', 
                  subtitle=None, 
                  visible_urls=[], 
                  field_of_science='Computer Vision', 
                  concise_summary='This paper addresses the challenges in Neural Radiance Field (NeRF) inpainting by proposing a framework that tempers the stochasticity of diffusion models and mitigates textural shifts with masked adversarial training. It also identifies commonly used pixel and perceptual losses as detrimental in NeRF inpainting tasks and demonstrates state-of-the-art results through rigorous experiments.', 
                  questions_document_can_answer=['What are the challenges in Neural Radiance Field inpainting?', 'How does the proposed framework improve NeRF inpainting?', 'Why are pixel and perceptual losses harmful in NeRF inpainting tasks?', 'What methods are used to mitigate textural shifts in NeRF inpainting?'])
]

try:
    # Establishing the connection
    db = mysql.connector.connect(**config)
    cursor = db.cursor()

    # SQL command to insert the data
    add_document = (
        "INSERT INTO documents "
        "(authors, journal_name, publication_date, keywords, doi, title, subtitle, visible_urls, field_of_science, concise_summary, questions_document_can_answer) "
        "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
    )

    # Data tuple to insert
    # data_document = (
    #     '[\"Author One\", \"Author Two\"]',
    #     'Journal of Example',
    #     date(2024, 4, 17),
    #     '[\"keyword1\", \"keyword2\"]',
    #     '10.1000/j.journal.2024v2',
    #     'Example Title',
    #     'Example Subtitle',
    #     '[\"http://example.com\"]',
    #     'Physics',
    #     'This is a concise summary of the document.',
    #     '[\"What is example?\", \"How does example work?\"]'
    # )
    for doc in res:
        # Convert DocumentMetadata object to a tuple for database insertion
        data_document = (
            json.dumps(doc.authors),  # Convert list to string
            doc.journal_name,
            doc.publication_date,
            json.dumps(doc.keywords),  # Convert list to string
            doc.doi,
            doc.title,
            doc.subtitle,
            json.dumps(doc.visible_urls),  # Convert list to string
            doc.field_of_science,
            doc.concise_summary,
            json.dumps(doc.questions_document_can_answer)  # Convert list to string
        )

        # Executing the SQL command
        cursor.execute(add_document, data_document)

    # Commit your changes in the database
    db.commit()

    print("Document added successfully.")

except mysql.connector.Error as err:
    print(f"Error: {err}")

finally:
    if db.is_connected():
        cursor.close()
        db.close()
        print("MySQL connection is closed")



Document added successfully.
MySQL connection is closed


In [48]:
# json.dumps(DocumentMetadata)
DocumentMetadata.schema()

{'properties': {'authors': {'items': {'type': 'string'},
   'title': 'Authors',
   'type': 'array'},
  'journal_name': {'title': 'Journal Name', 'type': 'string'},
  'publication_date': {'title': 'Publication Date', 'type': 'string'},
  'keywords': {'items': {'type': 'string'},
   'title': 'Keywords',
   'type': 'array'},
  'doi': {'title': 'Doi', 'type': 'string'},
  'title': {'title': 'Title', 'type': 'string'},
  'subtitle': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
   'title': 'Subtitle'},
  'visible_urls': {'items': {'type': 'string'},
   'title': 'Visible Urls',
   'type': 'array'},
  'field_of_science': {'title': 'Field Of Science', 'type': 'string'},
  'concise_summary': {'title': 'Concise Summary', 'type': 'string'},
  'questions_document_can_answer': {'items': {'type': 'string'},
   'title': 'Questions Document Can Answer',
   'type': 'array'}},
 'required': ['authors',
  'journal_name',
  'publication_date',
  'keywords',
  'doi',
  'title',
  'subtitle',
  'visible_

In [64]:
### LLM PARSING
from openai import OpenAI # pip install openai>=1.0

client = OpenAI(
    api_key="irrelevant", # any non-empty string
    base_url = "https://api.ncsa.ai/llm/v1" ## 👈 ONLY CODE CHANGE ##
)

completion = client.chat.completions.create(
    # model="teknium/OpenHermes-2.5-Mistral-7B",
    # model="mistralai/Mistral-7B-Instruct-v0.2", # better than teknium model 
    model="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", # way better than mistral instruct v0.2. Works great! As good as GPT-4 so far.
    messages=[
        # {"role": "system", "content": "You are an expert at categorizing scientific papers. Please categorize the following paper."},
        {"role": "user", "content": "You are an expert at categorizing scientific papers. Please categorize the following paper.\n" + raw_text},
    ],
    extra_body={"guided_json": DocumentMetadata.schema()},
    temperature=0.2,
    stream=True,
)

# ⚡️⚡️ streaming 
final = ""
for chunk in completion:
    print(chunk.choices[0].delta.content or "", end="")
    final += chunk.choices[0].delta.content or ""

# print(completion.choices[0].message.content)

{
  "authors": ["Chieh Hubert Lin", "Changil Kim", "Jia-Bin Huang", "Qinbo Li", "Chih Yao Ma", "Johannes Kopf", "Ming-Hsuan Yang", "Hung-Yu Tseng"],
  "concise_summary": "This paper proposes a framework for NeRF inpainting that addresses issues related to stochasticity and textural shift in the latent diffusion model. The framework uses per-scene customization and masked adversarial training to improve the quality of the inpainted NeRF. The authors also found that commonly used pixel and perceptual losses are not suitable for this task.",
  "doi": "2404.09995v1",
  "field_of_science": "Computer Science",
  "journal_name": "eccv",
  "keywords": ["Neural Radiance Field", "NeRF inpainting", "latent diffusion model", "stochasticity", "textural shift", "per-scene customization", "masked adversarial training", "pixel and perceptual losses"],
  "publication_date": "15 Apr 2024",
  "questions_document_can_answer": ["What is the main focus of this paper?", "What issues does the proposed framewo

In [69]:
doc = DocumentMetadata.parse_raw(final)
doc.dict()

{'authors': ['Chieh Hubert Lin',
  'Changil Kim',
  'Jia-Bin Huang',
  'Qinbo Li',
  'Chih Yao Ma',
  'Johannes Kopf',
  'Ming-Hsuan Yang',
  'Hung-Yu Tseng'],
 'journal_name': 'eccv',
 'publication_date': '15 Apr 2024',
 'keywords': ['Neural Radiance Field',
  'NeRF inpainting',
  'latent diffusion model',
  'stochasticity',
  'textural shift',
  'per-scene customization',
  'masked adversarial training',
  'pixel and perceptual losses'],
 'doi': '2404.09995v1',
 'title': 'Taming Latent Diffusion Model for Neural Radiance Field Inpainting',
 'subtitle': 'Taming Latent Diffusion Model for Neural Radiance Field Inpainting',
 'visible_urls': ['https://arxiv.org/abs/2404.09995v1'],
 'field_of_science': 'Computer Science',
 'concise_summary': 'This paper proposes a framework for NeRF inpainting that addresses issues related to stochasticity and textural shift in the latent diffusion model. The framework uses per-scene customization and masked adversarial training to improve the quality of 

{'authors': ['Chieh Hubert Lin',
  'Changil Kim',
  'Jia-Bin Huang',
  'Qinbo Li',
  'Chih Yao Ma',
  'Johannes Kopf',
  'Ming-Hsuan Yang',
  'Hung-Yu Tseng'],
 'journal_name': 'eccv',
 'publication_date': '15 Apr 2024',
 'keywords': ['Neural Radiance Field',
  'NeRF inpainting',
  'latent diffusion model',
  'stochasticity',
  'textural shift',
  'per-scene customization',
  'masked adversarial training',
  'pixel and perceptual losses'],
 'doi': '2404.09995v1',
 'title': 'Taming Latent Diffusion Model for Neural Radiance Field Inpainting',
 'subtitle': 'Taming Latent Diffusion Model for Neural Radiance Field Inpainting',
 'visible_urls': ['https://arxiv.org/abs/2404.09995v1'],
 'field_of_science': 'Computer Science',
 'concise_summary': 'This paper proposes a framework for NeRF inpainting that addresses issues related to stochasticity and textural shift in the latent diffusion model. The framework uses per-scene customization and masked adversarial training to improve the quality of 

DocumentMetadata(authors=['Chieh Hubert Lin', 'Changil Kim', 'Jia-Bin Huang', 'Qinbo Li', 'Chih Yao Ma', 'Johannes Kopf', 'Ming-Hsuan Yang', 'Hung-Yu Tseng'], journal_name='eccv', publication_date='15 Apr 2024', keywords=['Neural Radiance Field', 'NeRF inpainting', 'latent diffusion model', 'stochasticity', 'textural shift', 'per-scene customization', 'masked adversarial training', 'pixel and perceptual losses'], doi='2404.09995v1', title='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', subtitle='Taming Latent Diffusion Model for Neural Radiance Field Inpainting', visible_urls=['https://arxiv.org/abs/2404.09995v1'], field_of_science='Computer Science', concise_summary='This paper proposes a framework for NeRF inpainting that addresses issues related to stochasticity and textural shift in the latent diffusion model. The framework uses per-scene customization and masked adversarial training to improve the quality of the inpainted NeRF. The authors also found that com

{'authors': ['Chieh Hubert Lin',
  'Changil Kim',
  'Jia-Bin Huang',
  'Qinbo Li',
  'Chih Yao Ma',
  'Johannes Kopf',
  'Ming-Hsuan Yang',
  'Hung-Yu Tseng'],
 'journal_name': 'eccv',
 'publication_date': '15 Apr 2024',
 'keywords': ['Neural Radiance Field',
  'NeRF inpainting',
  'latent diffusion model',
  'stochasticity',
  'textural shift',
  'per-scene customization',
  'masked adversarial training',
  'pixel and perceptual losses'],
 'doi': '2404.09995v1',
 'title': 'Taming Latent Diffusion Model for Neural Radiance Field Inpainting',
 'subtitle': 'Taming Latent Diffusion Model for Neural Radiance Field Inpainting',
 'visible_urls': ['https://arxiv.org/abs/2404.09995v1'],
 'field_of_science': 'Computer Science',
 'concise_summary': 'This paper proposes a framework for NeRF inpainting that addresses issues related to stochasticity and textural shift in the latent diffusion model. The framework uses per-scene customization and masked adversarial training to improve the quality of 

In [28]:
print(completion.choices[0].message.content)
# Not great response... DOI is missing, publication date is wrong. Questions qre quite bad. Authors are not separated.
# teknium/OpenHermes-2.5-Mistral-7B

{
  "authors": [
    "Chieh Hubert Lin, Changil Kim, Jia-Bin Huang, Qinbo Li, Chih Yao Ma, Johannes Kopf, Ming-Hsuan Yang, Hung-Yu Tseng"
  ],
  "concise_summary": "Taming Latent Diffusion Model for Neural Radiance Field Inpainting, the authors propose a framework that addresses the problem of synthesizing reasonable geometry in completely uncovered regions of Neural Radiance Field (NeRF) for 3D reconstruction from multi-view images. They propose tempering the diffusion model's stochasticity with per-scene customization and mitigating the textural shift with masked adversarial training. They also found that commonly used pixel and perceptual losses are harmful in the NeRF inpainting task.",
  "doi": "",
  "field_of_science": "Computer Science > Computer Vision and Pattern Recognition",
  "journal_name": "European Conference on Computer Vision",
  "keywords": [
    "Latent diffusion model",
    "Neural Radiance Field Inpainting",
    "3D reconstruction",
    "Multi-view images"
  ],
  "