In [1]:
import os
import requests
import feedparser
from urllib.parse import quote
from pypdf import PdfReader
from datetime import datetime, timedelta

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document

from neo4j import GraphDatabase


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def fetch_arxiv_papers(query, max_results=5, days=7):
    """Fetch latest arXiv papers within the last N days."""
    base_url = "http://export.arxiv.org/api/query?"
    encoded_query = quote(query)
    search_query = (
        f"search_query=all:{encoded_query}&start=0&max_results={max_results}"
        f"&sortBy=submittedDate&sortOrder=descending"
    )
    feed = feedparser.parse(base_url + search_query)

    since_date = datetime.now() - timedelta(days=days)
    papers = []
    for entry in feed.entries:
        published = datetime.strptime(entry.published, "%Y-%m-%dT%H:%M:%SZ")
        if published >= since_date:
            papers.append({
                "id": entry.id,
                "title": entry.title,
                "authors": [a.name for a in entry.authors],
                "summary": entry.summary,
                "pdf_url": entry.id.replace("abs", "pdf") + ".pdf",
                "published": published.isoformat()
            })
    return papers

# Example usage
papers = fetch_arxiv_papers("large language model", max_results=3)
papers


[{'id': 'http://arxiv.org/abs/2510.25772v1',
  'title': 'VFXMaster: Unlocking Dynamic Visual Effect Generation via In-Context\n  Learning',
  'authors': ['Baolu Li',
   'Yiming Zhang',
   'Qinghe Wang',
   'Liqian Ma',
   'Xiaoyu Shi',
   'Xintao Wang',
   'Pengfei Wan',
   'Zhenfei Yin',
   'Yunzhi Zhuge',
   'Huchuan Lu',
   'Xu Jia'],
  'summary': 'Visual effects (VFX) are crucial to the expressive power of digital media,\nyet their creation remains a major challenge for generative AI. Prevailing\nmethods often rely on the one-LoRA-per-effect paradigm, which is\nresource-intensive and fundamentally incapable of generalizing to unseen\neffects, thus limiting scalability and creation. To address this challenge, we\nintroduce VFXMaster, the first unified, reference-based framework for VFX video\ngeneration. It recasts effect generation as an in-context learning task,\nenabling it to reproduce diverse dynamic effects from a reference video onto\ntarget content. In addition, it demonstra

In [3]:
def download_pdf(paper, output_dir="downloads"):
    os.makedirs(output_dir, exist_ok=True)
    filename = f"{output_dir}/{paper['id'].split('/')[-1]}.pdf"
    response = requests.get(paper["pdf_url"])
    if response.status_code == 200:
        with open(filename, "wb") as f:
            f.write(response.content)
        return filename
    return None


def read_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text() or ""
    return text

# Example: Download and read one paper
pdf_path = download_pdf(papers[0])
raw_text = read_pdf(pdf_path)
print(raw_text[:500])


VFXMaster: Unlocking Dynamic Visual Effect Generation via
In-Context Learning
Baolu Li1∗, Yiming Zhang 1∗, Qinghe Wang 1,2∗† , Liqian Ma 3B, Xiaoyu Shi 2,
Xintao Wang2, Pengfei Wan 2, Zhenfei Yin 4, Yunzhi Zhuge 1,
Huchuan Lu1, Xu Jia 1B
1Dalian University of Technology 2Kling Team, Kuaishou Technology 3ZMO AI Inc. 4Oxford University
https://libaolu312.github.io/VFXMaster
Target
Image
Ref.
Video
Target
Image
Ref.
Video
Target
Image
Ref.
Video
In-Domain Visual Effects
Out-of-Domain Visual Effects


In [4]:
def chunk_text(text, chunk_size=800, chunk_overlap=100):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=["\n\n", "\n", ".", " "]
    )
    return splitter.split_text(text)

chunks = chunk_text(raw_text)
print(f"Total chunks: {len(chunks)}")
print(chunks[0][:400])


Total chunks: 91
VFXMaster: Unlocking Dynamic Visual Effect Generation via
In-Context Learning
Baolu Li1∗, Yiming Zhang 1∗, Qinghe Wang 1,2∗† , Liqian Ma 3B, Xiaoyu Shi 2,
Xintao Wang2, Pengfei Wan 2, Zhenfei Yin 4, Yunzhi Zhuge 1,
Huchuan Lu1, Xu Jia 1B
1Dalian University of Technology 2Kling Team, Kuaishou Technology 3ZMO AI Inc. 4Oxford University
https://libaolu312.github.io/VFXMaster
Target
Image
Ref.
Video
T


In [5]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

def store_in_faiss(chunks, paper_meta, save_path="faiss_index"):
    docs = [
        Document(page_content=chunk, metadata=paper_meta)
        for chunk in chunks
    ]
    vectorstore = FAISS.from_documents(docs, embeddings)
    os.makedirs(save_path, exist_ok=True)
    vectorstore.save_local(save_path)
    return vectorstore

# Example
vectorstore = store_in_faiss(chunks, papers[0])
print(" Stored embeddings in FAISS index.")


 Stored embeddings in FAISS index.


In [16]:
NEO4J_URI="bolt://localhost:7687"
NEO4J_USER="neo4j"
NEO4J_PASSWORD="testpassword"

driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER,NEO4J_PASSWORD))

def store_paper_in_neo4j(paper,chunks):
    with driver.session() as session:
        session.run("""
            MERGE(p:Paper {id:$id})
            SET p.title = $title , p.summary= $summary,
                p.published =$published , p.authors = $authors        
        """,parameters=paper)

        for i, chunk in enumerate(chunks):
            session.run("""
                MERGE (c:Chunk {paper_id: $paper_id, chunk_id: $chunk_id})
                SET c.text = $text
                MERGE (p:Paper {id: $paper_id})-[:HAS_CHUNK]->(c)
            """, parameters={
                "paper_id": paper["id"],
                "chunk_id": i,
                "text": chunk
            })

            store_paper_in_neo4j(papers[0], chunks)
            print("✅ Paper + chunks stored in Neo4j graph.")

In [17]:
def weekly_ingest(topic="LLM"):
    print(f"\n Ingesting new arXiv papers for topic: {topic}")
    new_papers = fetch_arxiv_papers(topic, max_results=3)
    for paper in new_papers:
        pdf_path = download_pdf(paper)
        if pdf_path:
            text = read_pdf(pdf_path)
            chunks = chunk_text(text)
            store_in_faiss(chunks, paper)
            store_paper_in_neo4j(paper, chunks)
    print("✅ Weekly ingestion completed.\n")

# Run manually (in place of cron)
weekly_ingest("Graph Neural Networks")



 Ingesting new arXiv papers for topic: Graph Neural Networks


ConnectionAcquisitionTimeoutError: failed to obtain a connection from the pool within 60.0s (timeout)