# Basic metadata extraction using DSPy and a local LLM

To run this, you first need to start a local vLLM server in the backround with a command like this:

    vllm serve $MODEL_ID --port 7987 --max-model-len 32768 --gpu-memory-utilization 0.9

where MODEL_ID is e.g. `Qwen/Qwen3-4B-Instruct-2507` and the port has to match the PORT setting below.

In [2]:
import dspy

MODEL_ID = "Qwen/Qwen3-4B-Instruct-2507"  # should match the model vLLM is running (does it matter??)
PORT = 7987  # should match the port where vLLM is running

lm = dspy.LM("openai/" + MODEL_ID,
             api_base=f"http://localhost:{PORT}/v1",  # ensure this points to your port
             api_key="local", model_type="chat")
dspy.configure(lm=lm)

# test the connection to the LLM
lm("Say this is a test!", temperature=0.0)  # => ['This is a test!']

['Sure! "Say this is a test!" — I\'m here to help with any questions or tasks you have. Is there something specific you\'d like to discuss or work on? 😊']

In [3]:
class ExtractInfo(dspy.Signature):
    """Extract structured information from text."""

    text: str = dspy.InputField()
    language: str = dspy.OutputField(desc="The language of the resource expressed as a BCP47 language tag.")
    title: str = dspy.OutputField(desc="The main title of the publication.")
    alt_title: list[str] = dspy.OutputField(desc="Alternative or parallel titles of the publication, suffixed with a BCP47 language tag in curly brackets.")
    creator: list[str] = dspy.OutputField(desc="The primary author(s) of the resource.")
    year: int = dspy.OutputField(desc="The year on which the resource was issued or made available.")
    publisher: list[str] = dspy.OutputField(desc="The entity/entities responsible for making the resource available.")
    doi: str = dspy.OutputField(desc="The Digital Object Identifier (DOI) associated with the resource.")
    e_isbn: list[str] = dspy.OutputField(desc="The ISBN associated with the electronic resource.")
    p_isbn: list[str] = dspy.OutputField(desc="The ISBN of the printed version of this document.")
    e_issn: str = dspy.OutputField(desc="The ISSN associated with the electronic resource.")
    p_issn: str = dspy.OutputField(desc="The ISSN of the printed version of this document.")
    type_coar: str = dspy.OutputField(desc="The type of the resource according to the COAR Resource Types classification.")

module = dspy.Predict(ExtractInfo)

text = "Apple Inc. announced its latest iPhone 14 today." \
    "The CEO, Tim Cook, highlighted its new features in a press release."
response = module(text=text)

print(response)


Prediction(
    language='en',
    title='Apple Announces Latest iPhone 14 Features',
    alt_title=[],
    creator=['Apple Inc.', 'Tim Cook'],
    year=2023,
    publisher=['Apple Inc.'],
    doi='10.1234/iphone.2023.001',
    e_isbn=[],
    p_isbn=[],
    e_issn='2572-3030',
    p_issn='0000-0000',
    type_coar='JournalArticle'
)
