In [1]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import AzureChatOpenAI
from dotenv import load_dotenv
import json
import os

In [2]:
load_dotenv()

True

In [3]:
with open ("testcases/test case 1/NIPS-2017-attention-is-all-you-need-Bibtex.txt", "r") as file:
    passage = file.read()
    
with open ("testcases/test case 1/paper citations_schema.json", "r") as file:
    schema = json.load(file)

In [11]:
llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment=os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT"), 
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    temperature=0.7,
    model_kwargs={"response_format": {"type": "json_object"}}
    )

In [15]:
from langchain_core.output_parsers import JsonOutputParser

parser = JsonOutputParser()

prompt = PromptTemplate(
    template = """
    You are a JSON generator. Your task is to:
    1. Carefully read the following passage.
    2. Analyze the JSON Schema provided.
    3. Generate a JSON object that captures the key details from the passage strictly according to the schema provided keeping data type and regular expression in consideration.
    4. Ensure that only keys present in the schema are included in the output JSON, do not generate extra keys or content which is not in the schema.
    
    **passage**: {passage}
    
    **schema**: {schema}
    
    **output**: Return only the generated JSON. Do not include any explanation, markdown, or surrounding text.
    \n{format_instruction}
    """,
    input_variables=["passage","schema"],
    partial_variables={"format_instruction": parser.get_format_instructions()},
)

chain = prompt | llm | parser

In [16]:
result = chain.invoke({"passage": passage,"schema":json.dumps(schema)})

In [17]:
result

{'authors': [{'given-names': 'Ashish', 'family-names': 'Vaswani'},
  {'given-names': 'Noam', 'family-names': 'Shazeer'},
  {'given-names': 'Niki', 'family-names': 'Parmar'},
  {'given-names': 'Jakob', 'family-names': 'Uszkoreit'},
  {'given-names': 'Llion', 'family-names': 'Jones'},
  {'given-names': 'Aidan N', 'family-names': 'Gomez'},
  {'given-names': '\\\\L ukasz', 'family-names': 'Kaiser'},
  {'given-names': 'Illia', 'family-names': 'Polosukhin'}],
 'cff-version': '1.2.0',
 'message': 'If you use this software, please cite it using the metadata from this file.',
 'title': 'Attention is All you Need',
 'type': 'software',
 'url': 'https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf',
 'version': '30',
 'identifiers': [{'type': 'url',
   'value': 'https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf'}],
 'date-released': '2017-01-01'}