In [1]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
import os

load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [15]:
from goose3 import Goose

g = Goose()

html_files_dict = {}
for root, dirs, files in os.walk("notebooks"):
    for file in files:
        if file.endswith(".html"):
            with open(os.path.join(root, file), "r") as f:
                article = g.extract(raw_html=f.read())
                title = article.title
                content = article.cleaned_text
                html_files_dict[title] = content

In [16]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field

chat_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=1.2)

with open("templates/extract_news_template.md", "r") as f:
    extract_news_template = f.read()


class NewsArticleSummary(BaseModel):
    title: str = Field(description="Teaser Headline")
    lede: str = Field(description="Intro line, the lede of the story")
    context: str = Field(description="Why this story matters")
    details: str = Field(
        description="Fuller details of the news story, 2 to 3 short sentences"
    )
    who_benefits: str = Field(description="Who is this story good for?")
    who_loses: str = Field(description="Who is hurt by this story?")
    bias: str = Field(
        description="Considering the tone of the writing and the subject, describe potential biases"
    )
    jargon: str = Field(description="Pick out potential jargon words and explain them")


parser = PydanticOutputParser(pydantic_object=NewsArticleSummary)

prompt_template = PromptTemplate(
    template=extract_news_template,
    input_variables=["title", "content"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

news_articles_dict = {}
for title in html_files_dict:
    prompt = prompt_template.format(title=title, content=html_files_dict[title])
    news_articles_dict[title] = chat_model.predict(prompt)

print(news_articles_dict)

InvalidRequestError: This model's maximum context length is 4097 tokens. However, your messages resulted in 18429 tokens. Please reduce the length of the messages.

In [None]:
from IPython.display import Markdown, display
import json

with open("templates/summary_format.md", "r") as f:
    summary_format = f.read()

# article = json.loads(
#     news_articles_dict[
#         "Wagner chief Prigozhin was on plane that crashed in Russia, aviation agency says"
#     ]
# )

# article = json.loads(
#     news_articles_dict["Boston police agree to eliminate public records backlog within 6 months, settling lawsuit - The Boston Globe"]
# )

article = json.loads(
    news_articles_dict[
        "Photo of ‘entitled’ tourists at national park sparks outrage online: ‘This is why we can’t have nice things’"
    ]
)

display(
    Markdown(
        summary_format.format(
            title=article["title"],
            lede=article["lede"],
            context=article["context"],
            details=article["details"],
            who_benefits=article["who_benefits"],
            who_loses=article["who_loses"],
            bias=article["bias"],
            jargon=article["jargon"],
        )
    )
)

# Photo of 'entitled' tourists at national park sparks outrage online: 'This is why we can't have nice things'

Tourists and visitors ignoring park rules and signage continue to spark headlines nationwide, from dangerous stunts and tormenting wildlife to vandalizing protected areas and other bad behaviors.

## Why This Story Matters

This story highlights the ongoing issue of tourists disregarding rules and damaging national parks, which impacts the parks' ability to rehabilitate damaged land and protect biodiversity.

## Details

The photo taken at Maroon Bells Scenic Area in Colorado shows tourists outside of the designated area despite a sign stating that the area was 'closed for revegetation.' The National Park Service, while caring for fragile ecosystems, struggles due to a lack of staff and funding to adequately enforce rules and signage throughout parklands. This negligence hinders the efforts of volunteers who are working to restore native plants and preserve the park's natural beauty.

## Who is Affected

Longtime park-goers who value outdoor etiquette and conservation efforts benefit from increased awareness of the issue at hand.

The parks themselves, as well as the volunteers working to restore native plants and protect the environment, suffer from the continued disregard of park rules and revegetation efforts.

## Potential Bias

The writing of this summary leans towards supporting the conservationist perspective, focusing on the negative impacts of tourists ignoring park rules.

## Terminology

Revegetation: The practice of replanting and rebuilding native plants and improving soil quality on disturbed lands in order to restore biodiversity and protect water quality.

