In [1]:
from typing import List
from dotenv import load_dotenv
import boto3

load_dotenv()


class Post:
    content: str
    filename: str

    def __init__(self, content: str, filename: str) -> None:
        self.content = content
        self.filename = filename

    def __str__(self) -> str:
        return f"""File Name: {self.filename}\n\nContent:\n\n{self.content}"""


def get_posts_from_bucket(bucket: str) -> List[Post]:
    s3_client = boto3.client("s3")
    post_objects = list(
        filter(
            lambda object: object["Key"].endswith(".md"),
            s3_client.list_objects_v2(Bucket=bucket).get("Contents", []),
        )
    )

    filenames = list(map(lambda object: object["Key"], post_objects))

    posts: List[Post] = []

    for name in filenames:
        object = s3_client.get_object(Bucket=bucket, Key=name)
        content = object["Body"].read().decode("utf-8")

        new_post = Post(content=content, filename=name)
        posts.append(new_post)

    return posts


def read_markdown_file(file_path: str) -> str:
    with open(file_path, "r", encoding="utf-8") as file:
        content = file.read()
    return content


posts = get_posts_from_bucket(bucket="be-awesome-dev-posts")


In [39]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field


class Summary(BaseModel):
    main_topic: str = Field(
        description="The overall topic of the whole article, this should be only 10 words long at maximum"
    )
    summary_content: str = Field(
        description="The content of the summary, this should be very concise and should only be 10 sentences max in length."
    )


model = ChatOllama(
    model="llama3.1:8b", temperature=0, verbose=True, num_ctx=10000
).with_structured_output(Summary)

system_prompt_template = """
You are a helpful assistant that can give summary over markdown documents.
Given a markdown article, summarise its content with the following requirement:

* DO NOT write code in your summary.
* DO NOT include code examples in your summary, you must keep the summary as concise as possible.
* For each article, briefly describe what it is about overall and mention the main topics without further explanation.
* Be as concise as possible with your summary.

Below is the markdown name of the article and its content:

Article name: 

{article_name}

Content:

{content}
"""

prompt = ChatPromptTemplate.from_template(system_prompt_template)

chain = prompt | model

summaries: List[Summary] = []

for post in posts:
    post_content = post.content
    post_name = post.filename
    response: Summary = chain.invoke(
        {"content": post_content, "article_name": post_name.split("/")[-1]}
    )

    summaries.append(response)

with open("summary.md", "w", encoding="utf-8") as output_file:
    for s in summaries:
        output_file.write(f"Topic: {s.main_topic}\n")
        output_file.write(f"Summary:\n{s.summary_content}\n\n")
        output_file.write("\n\n===================================\n\n")
        print(f"Written summary content of {s.main_topic} to file")

Written summary content of Modern Javascript Syntax to file
Written summary content of Array Methods in JavaScript to file
Written summary content of Understanding Object Methods in JavaScript to file
Written summary content of Synchronous vs Asynchronous Programming in JavaScript to file
Written summary content of Asynchronous programming in JavaScript using callbacks to file
Written summary content of JavaScript Promises for Asynchronous Operations to file
Written summary content of Async/Await in Javascript to file
Written summary content of JavaScript Event Loop and Asynchronous Programming to file
