In [1]:
# imports
import datetime
import textwrap
from pathlib import Path

# packages
from IPython.display import HTML, display

# kelvin imports
from kelvin.research.fr.feed.updater import FRUpdater
from kelvin.nlp.llm.engines.openai_engine import OpenAIEngine
from kelvin.nlp.llm.summarize.recursive_split_summarizer import RecursiveSplitSummarizer
from kelvin.nlp.llm.summarize.text_memoizer import TextMemoizer

In [2]:
# setup both GPT 3.5 and 4 engines
llm_gpt35 = OpenAIEngine(model="gpt-3.5-turbo")
llm_gpt4 = OpenAIEngine(model="gpt-4")

In [3]:
# setup summarizer and memoizer
summarizer = RecursiveSplitSummarizer(engine=llm_gpt35,
                                      context_role="regulatory attorney",
                                      summary_sentence_length=4)

memoizer = TextMemoizer(engine=llm_gpt4, 
                        context_role="super chill pirate lawyer",
                        context_type="Monday morning email memo")

In [None]:
# setup Federal Register source and date range
fr_source = FRUpdater()
start_date=datetime.date(2023, 4, 3)
end_date=datetime.date(2023, 4, 5)

# setup summarizer
async for doc in fr_source.get_all_documents(start_date, end_date, progress=False):    
    # only process Treasury documents
    if "Treasury Department" not in doc['agency_names']:
        continue
        
    # show the document title
    display(HTML(f"<h3>{doc['title']}</h3>"))
    
    # write a summary
    summary = summarizer.get_summary(doc["text"], progress=False)
    display(HTML(f"<strong>Summary:</strong><pre>{textwrap.fill(summary, 80)}</pre><br />"))
    
    # now turn the summary into a memo
    memo = memoizer.get_summary(summary, progress=False)
    display(HTML(f"<strong>Memo:</strong><hr /><pre>{textwrap.fill(memo, 80)}</pre><br />"))   