In [1]:
import requests

from typing import List

from haystack import Document, Pipeline, component
from haystack.components.builders import PromptBuilder
from haystack.components.generators.openai import OpenAIGenerator
from haystack.components.fetchers import LinkContentFetcher
from haystack.components.converters import HTMLToDocument

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
trending_list = requests.get(
        url="https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty"
    )
post = requests.get(
    url=f"https://hacker-news.firebaseio.com/v0/item/{trending_list.json()[0]}.json?print=pretty"
)

print(post.json())

SyntaxError: invalid syntax. Perhaps you forgot a comma? (3790760437.py, line 5)

In [8]:
post.json()['url']

'https://apps.apple.com/us/app/tik/id6720712299'

In [9]:
@component
class HackernewsNewestFetcher:
    def __init__(self):
        fetcher = LinkContentFetcher()
        converter = HTMLToDocument()

        html_conversion_pipeline = Pipeline()
        html_conversion_pipeline.add_component("fetcher", fetcher)
        html_conversion_pipeline.add_component("converter", converter)

        html_conversion_pipeline.connect("fetcher", "converter")
        self.html_pipeline = html_conversion_pipeline
        
    @component.output_types(articles=List[Document])
    def run(self, top_k: int):
        articles = []
        trending_list = requests.get(
            url="https://hacker-news.firebaseio.com/v0/topstories.json?print=pretty"
        )
        for id in trending_list.json()[0:top_k]:
            post = requests.get(
                url=f"https://hacker-news.firebaseio.com/v0/item/{id}.json?print=pretty"
            )
            if "url" in post.json():
                try:
                    article = self.html_pipeline.run(
                        {"fetcher": {"urls": [post.json()["url"]]}}
                    )
                    articles.append(article["converter"]["documents"][0])
                except:
                    print(f"Can't download {post}, skipped")
            elif "text" in post.json():
                try:
                    articles.append(Document(content=post.json()["text"], meta= {"title": post.json()["title"]}))
                except:
                    print(f"Can't download {post}, skipped")
        return {"articles": articles}

In [10]:
fetcher = HackernewsNewestFetcher()
results = fetcher.run(top_k=3)

print(results['articles'])

[Document(id=68edc1d9f8f1ff939531f702d1b29ed59e849f1225110bd4998ef1c23d32326e, content: 'Tikâª!â¬ 4+
Discreet, Anytime, Anywhere
Rohan Sachdeva
-
- $0.95
iPhone Screenshots
Description
Ther...', meta: {'content_type': 'text/html', 'url': 'https://apps.apple.com/us/app/tik/id6720712299'}), Document(id=6b5cd00496edc3f582c4f6f9489bdaf79629a7222c5371bbc63d1dfdf370c6de, content: 'The $1,000 Wheelchair
How the YouTubers from JerryRigEverything are Making Affordable Wheelchairs Wi...', meta: {'content_type': 'text/html', 'url': 'https://newmobility.com/not-a-wheelchair/'}), Document(id=341cd8b16767c39980eacba2be9cb6c65d629db8610885ed9d0696fe1fa36807, content: 'Serialization is the Secret
If a value mutates in the forest with no one to see it, does it really m...', meta: {'content_type': 'text/html', 'url': 'https://www.zachdaniel.dev/p/serialization-is-the-secret'})]


In [13]:
from haystack_integrations.components.generators.ollama import OllamaGenerator


prompt_template = """  
You will be provided a few of the top posts in HackerNews, followed by their URL.  
For each post, provide a brief summary followed by the URL the full post can be found at.  
  
Posts:  
{% for article in articles %}  
  {{ article.content }}
  URL: {{ article.meta["url"] }}
{% endfor %}  
"""

prompt_builder = PromptBuilder(template=prompt_template)
fetcher = HackernewsNewestFetcher()
llm = OllamaGenerator(model="llama3.1:latest",
                            url = "http://localhost:11434",
                            generation_kwargs={
                              #"num_predict": 100,
                             # "temperature": 0.9,
                              })

summarizer_pipeline = Pipeline()
summarizer_pipeline.add_component("fetcher", fetcher)
summarizer_pipeline.add_component("prompt", prompt_builder)
summarizer_pipeline.add_component("llm", llm)

summarizer_pipeline.connect("fetcher.articles", "prompt.articles")
summarizer_pipeline.connect("prompt", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x29b233550>
🚅 Components
  - fetcher: HackernewsNewestFetcher
  - prompt: PromptBuilder
  - llm: OllamaGenerator
🛤️ Connections
  - fetcher.articles -> prompt.articles (List[Document])
  - prompt.prompt -> llm.prompt (str)

In [15]:
summaries = summarizer_pipeline.run({"fetcher": {"top_k": 2}})

print(summaries["llm"]["replies"][0])

Not a Wheelchair (NaW) is an employee-owned company that aims to disrupt the legacy wheelchair industry by offering high-quality, affordable wheelchairs. The company's approach is centered around making its base model chair available at low margins, with higher profit margins on upgraded versions.

Key points about Not a Wheelchair:

1. **Employee ownership**: NaW is an employee-owned company, which allows employees to have a stake in the business and share profits.
2. **Base model chair**: The company offers a basic wheelchair at affordable prices, making it accessible to those who need a reliable and functional ride.
3. **Upgrade options**: Customers can upgrade their chairs with various components, such as carbon fiber seat pans, wooden handrims, and more complex frame bends, which increases profit margins for the company.
4. **Innovative designs**: NaW is working on developing new wheelchair models, including a four-wheel drive version of "The Rig" and a track wheelchair still in d