In [1]:
RFP_INFO_PROMPT = (
    "Attached is a text from an RFP document.\n"
    "Based on the given schema, extract all that you can.\n"
    "The schema is as follows:\n"
    "{schema}\n"
    "The text is as follows:\n"
    "{text}"
)

In [2]:
from langchain_core.prompts import ChatPromptTemplate

In [3]:
# from langchain.chains import create_extraction_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI

class ExtractionChain:

    def __init__(
        self,
        prompt: ChatPromptTemplate,
        model: ChatOpenAI = ChatOpenAI(model="gpt-3.5-turbo"),
        output_parser: StrOutputParser = StrOutputParser(),
    ):
        self.chain = (
            {"text": RunnablePassthrough()} 
            | prompt
            | model
            | output_parser
        )

    def invoke(self, text: str) -> str:
        return self.chain.invoke(text)

In [4]:
class SafeDict(dict):
    def __missing__(self, key):
        return '{' + key + '}'

rfp_info_schema = {
    "title": "Title",
    "issuing_organization": "Issuing Organization",
    "unique_id": "Unique ID",
    "url": "URL",
}
_prompt = RFP_INFO_PROMPT.format_map(
    SafeDict(schema="\n".join([f"{k}: {v}" for k, v in rfp_info_schema.items()]))
)
prompt = ChatPromptTemplate.from_template(_prompt)
print(_prompt)

Attached is a text from an RFP document.
Based on the given schema, extract all that you can.
The schema is as follows:
title: Title
issuing_organization: Issuing Organization
unique_id: Unique ID
url: URL
The text is as follows:
{text}


In [7]:
info = []
e = ExtractionChain(
    prompt=prompt,
)
chunks = ["""earch Projects Ag
CODE 16. ADMINISTERED BYCODE 
X
X
X
611430
SIZE STANDARD:
 100.00 % FOR:SET ASIDE:UNRESTRICTED ORARPA-H
REQUEST FOR 
PROPOSAL 
(RFP)
INVITATION 
FOR BID (IFB)
10. THIS ACQUISITION ISCODE 
REQUEST FOR 
QUOTE (RFQ)
14. METHOD OF SOLICITATION
13b. RATING
NORTH AMERICAN INDUSTRY 
CLASSIFICATION STANDARD 
(NAICS):
SMALL BUSINESS
04/15/2024 1600 EDT
03/21/2024
+1 000 000 0000BRYAN SIMMONS
(No collect calls)
INFORMATION CALL:
FOR SOLICITATION 8. OFFER DUE DATE/LOCAL TIMEb. TELEPHONE  NUMBERa. NAME
4. ORDER NUMBER3. AWARD/ 6. SOLICITATION 5.  SOLICITATION NUMBER
75N99224Q00001
SOLICITATION/CONTRACT/ORDER FOR COMMERCIAL ITEMS 1. REQUISITION NUMBER PAGE     OF
1  41 OFFEROR TO COMPLETE BLOCKS 12, 17, 23, 24, & 30
TELEPHONE NO.
17a. CONTRACTOR/
,
15. DELIVER TO
, MD 20892
1 Center Drive
BG NIHBC 01 Room 228
9. ISSUED BY 
7.  
2. CONTRACT NO. 
EFFECTIVE DATE
$15
18b. SUBMIT INVOICES TO ADDRESS SHOWN IN BLOCK 18a UNLESS BLOCK  BELOW
ISSUE DATE
DELIVERY FOR FREE ON BOARD 
(FOB) DESTINATION UNLESS 
BLOCK IS MARKED
11. 
SEE SCHEDULEX
12. DISCOUNT TERMS THIS CONTRACT IS A RATED 
ORDER UNDER THE DEFENSE 
PRIORITIES AND ALLOCATIONS 
SYSTEM - DPAS (15 CFR 700)
13a. 
SERVICE-DISABLED
VETERAN-OWNED
SMALL BUSINESS 
(SDVOSB)
HUBZONE SMALL 
BUSINESS
8(A)
NIH Advanced Research Projects Ag WOMEN-OWNED SMALL
BUSINESS (WOSB)
ECONOMICALLY DISADVANTAGED 
WOMEN-OWNED SMALL 
BUSINESS (EDWOSB)
24.
AMOUNT
23.
UNIT PRICE
22.
UNIT
21. 
QUANTITY
20.
SCHEDULE OF SUPPLIES/SERVICES
19.
ITEM NO.
Period of Performance: 07/01/2024 to 06/30/2029
1001    Task 1, 2, and 6 (Combined): This combined price 12 EA
should encompass all costs associated with
Employee Development, Career Alignment, and
Program and Contract Management, including
personnel costs, materials, overhead, and any
other direct and indirect costs. Please note that
this price will be billed monthly.
Product/Service Code:  R431
Continued ...
(Use Reverse and/or Attach Additional Sheets as Necessary)
HEREIN, IS ACCEPTED AS TO ITEMS: 
X
XX
DATED
STAS L. BLAUSHILD
.  YOUR OFFER ON SOLICITATION (BLOCK 5), 
INCLUDING ANY ADDITIONS OR  CHANGES WHICH ARE SET FORTH 
 1
COPIES TO ISSUING OFFICE.  CONTRACTOR AGREES TO FURNISH AND DELIVER
ARE
ARE
31c. DATE SIGNED
27b. CONTRACT/PURCHASE ORDER INCORPORATES BY REFERENCE FAR 52.212-4. FAR 52.212-5 IS ATTACHED.           ADDENDA
31a. UNITED STATES OF AMERICA (SIGNATURE OF CONTRACTING OFFICER) 
30c. DATE SIGNED 31b. NAME OF CONTRACTING OFFICER (Type or print)
ALL ITEMS SET FORTH OR OTHERWISE IDENTIFIED ABOVE AND ON ANY ADDITIONAL 
SHEETS SUBJECT TO THE TERMS AND CONDITIONS SPECIFIED.
27a. SOLICITATION INCORPORATES BY REFERENCE (FEDERAL ACQUISITION REGULATION) FAR 52.212-1, 52.212-4.  FAR 52.212-3 
AND 52.212-5 ARE ATTACHED.     ADDENDA
26. TOTAL AWARD AMOUNT (For Government Use Only)
OFFER 
STANDARD FORM 1449 (REV. 11/2021)
Prescribed by GSA - FAR (48 CFR) 53.212
ARE NOT ATTACHED.
ARE NOT ATTACHED.
AUTHORIZED FOR LOCAL REPRODUCTION
PREVIOUS EDITION IS NOT USABLE 
30b. NAME AND TITLE OF SIGNER (Type or print)
30a. SIGNATURE OF OFFEROR/CONTRACTOR
28. CONTRACTOR IS REQUIRED TO SIGN THIS DOCUMENT AND RETURN 
25. ACCOUNTING AND APPROPRIATION DATA
29. AWARD OF CONTRACT: REFERENCE
"""]
for chunk in chunks:
    info.extend([e.invoke(chunk)])
info

['Title: NIH Advanced Research Projects Ag\nIssuing Organization: NIH\nUnique ID: 75N99224Q00001\nURL: Not provided']