In [1]:
from ollama import Client
from langchain_core.prompts import PromptTemplate
import json
from langchain_ollama.llms import OllamaLLM
from langchain.chains.llm import LLMChain
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableConfig
from langchain_core.output_parsers import StrOutputParser
from duckduckgo_search import DDGS
from bs4 import BeautifulSoup
import requests
import time
import random

In [2]:
# Subtopics chain

subtopics_template = PromptTemplate.from_template(
    """Generate a list of {number_of_subtopics} subheadings for a highschool essay on {topic}.
    Order the subheadings such that they have a logical sequence.
    Each subheading should be accompanied with a detailed discription on what the contents should be about.
    Provide output in json format as follows:

    {{'<heading1>':'<description1>',
    '<heading2>':'<description2>',...}}
    
    Only return the json object without any additional text."""
)
    
model = OllamaLLM(model="llama3.2:latest", run_local=True, format='json')

subtopic_chain = LLMChain(
    llm=model,
    prompt=subtopics_template,
    output_key="subtopics",
    output_parser= StrOutputParser()
)

  subtopic_chain = LLMChain(


In [3]:
# subtopic_chain.invoke(
#     {
#         "topic": "History of the Internet",
#         "number_of_subtopics": 3
#     }
# )

In [4]:
passthrough_chain = RunnablePassthrough().assign(
    subtopic_headings = lambda x: list(json.loads(x['subtopics']).keys()),
    subtopic_descriptions = lambda x: list(json.loads(x['subtopics']).values())
)

In [5]:
# passthrough_chain.invoke(subtopic_chain.invoke(
#     {
#         "topic": "History of the Internet",
#         "number_of_subtopics": 3
#     }
# ))

In [6]:
# def print_output(output):
#     print(output)
#     return output

In [7]:
# sequence_chain = subtopic_chain | RunnableLambda(print_output) | passthrough_chain
# result = sequence_chain.invoke(
#     {
#         "topic": "History of the Internet",
#         "number_of_subtopics": 3
#     }
# )
# result

In [8]:
def get_search_query(description):
    # print(description)
    search_query_prompt = PromptTemplate.from_template(
        """Generate a search query for the following description: {description}.
        The search query should be concise and relevant to the topic.
        Return only the search query without any additional text.
        The search query should be in English and should not contain any special characters."""
    )
    model = OllamaLLM(model="llama3.2:latest", run_local=True)
    search_query_chain = LLMChain(
        llm=model,
        prompt=search_query_prompt,
        output_key="search_query",
        output_parser= StrOutputParser()
    )
    return search_query_chain.invoke({"description": description})["search_query"].replace('"', '').replace("'", "")

In [9]:
search_query_chain = RunnablePassthrough.assign(
    search_queries=lambda x: RunnableLambda(get_search_query).batch(x['subtopic_descriptions'])
)

In [10]:
# sequence_chain = subtopic_chain |  passthrough_chain | search_query_chain
# result = sequence_chain.invoke({"topic": "History of the Internet", "number_of_subtopics": 3})
# result


In [11]:
# a0 = ['Foundations of the Internet',
#   'Expansion and Growth',
#   "The Internet's Impact on Society"]

# a1 = ['ARPANET history packet switching Vint Cerf Bob Kahn internet development',
#   'history of the internet from its origins to modern day development',
#   'Effects of the internet on modern society and its impact on communication, e-commerce, education, culture, cybercrime, disinformation, and online inequality.']

# a2 = ['This section will explore the origins and development of the internet, including the ARPANET project, its creators, and early innovations such as packet switching. It should also discuss the role of key figures like Vint Cerf and Bob Kahn in shaping the modern internet.',
#   "In this section, we'll examine how the internet expanded beyond its military roots to become a global network, driven by technological advancements, economic factors, and social movements. Topics may include the widespread adoption of personal computers, dial-up connections, and the emergence of the World Wide Web.",
#   "This final section will delve into the internet's profound effects on modern society, including its role in shaping global communication, e-commerce, education, and culture. We'll also discuss potential risks, challenges, and controversies surrounding the internet's growth, such as cybercrime, disinformation, and online inequality."]

# a3 = zip(a0, a1, a2)
# list(a3)[0][0]

In [21]:
def search_and_summarize(search_item):
    # print(search_item)
    query = search_item[0]
    description = search_item[1]
    time.sleep(40)  # To avoid hitting rate limits
    links = DDGS().text(query, max_results=5)
    content = ""
    for link in links:
        try:
            search = requests.get(link['href']).text
            search = BeautifulSoup(search, 'html.parser')
            search = search.get_text(separator="\n", strip=True)
            content += search
            content += "\n\n"
        except Exception as e:
            print(f"Error fetching {link['href']}: {e}")
    
    search_summary_prompt = PromptTemplate.from_template(
        """Obtain the information described in the description from the content.
        Use only what is find in the content.
        Do not add anything in addition.
        Summarize the most useful or interesting information for a highschool report.
        Description: {description}
        Content: {content}""")
    
    model = OllamaLLM(model="llama3.2:latest", run_local=True)

    search_summary_chain = search_summary_prompt | model | StrOutputParser()
    return search_summary_chain.invoke({
        "description": description,
        "content": content
    })

In [13]:
# RunnableLambda(search_and_summarize).invoke(list(zip(a1, a2)))

In [14]:
run_config = RunnableConfig(
    max_concurrency=1,  # Adjust based on your system's capabilities
    retry_on_failure=True,
    retry_count=3,
    retry_delay=5  # Delay in seconds between retries
)

parallel_search_chain = RunnablePassthrough().assign(
    #test = lambda x: print(list(zip(x['search_queries'], x['subtopic_descriptions']))),
    search_results=lambda x: RunnableLambda(search_and_summarize).batch(list(zip(x['search_queries'], x['subtopic_descriptions'])), config=run_config)
)

In [15]:
sequence_chain = subtopic_chain |  passthrough_chain | search_query_chain | parallel_search_chain
result = sequence_chain.invoke({"topic": "History of the Internet", "number_of_subtopics": 3})
result

('history of the internet arpanet tcp ip vint cerf bob kahn', "This section should discuss the origins and development of the internet from its inception in the 1960s, including the ARPANET project, the creation of the TCP/IP protocol, and the early networking concepts. It will also explore the role of key figures such as Vint Cerf and Bob Kahn in shaping the internet's infrastructure.")
('Emergence of World Wide Web 1990s history HTTP browsers online communities', 'This section should focus on the emergence of the world wide web (WWW) in the mid-1990s, including the invention of HTTP, the development of browsers like Mosaic and Netscape, and the growth of online communities. It will also examine the impact of the web on global communication, commerce, and culture.')
('recent internet technological advancements social media e commerce streaming cybersecurity online privacy digital divide', 'This section should discuss the significant technological advancements that have shaped the inte

{'topic': 'History of the Internet',
 'number_of_subtopics': 3,
 'subtopics': '{"Establishing the Foundation: The Early Years of the Internet": "This section should discuss the origins and development of the internet from its inception in the 1960s, including the ARPANET project, the creation of the TCP/IP protocol, and the early networking concepts. It will also explore the role of key figures such as Vint Cerf and Bob Kahn in shaping the internet\'s infrastructure.", \n"The Internet Expands: The Rise of the World Wide Web": "This section should focus on the emergence of the world wide web (WWW) in the mid-1990s, including the invention of HTTP, the development of browsers like Mosaic and Netscape, and the growth of online communities. It will also examine the impact of the web on global communication, commerce, and culture.", \n"The Modern Internet: Evolving Technologies and Challenges": "This section should discuss the significant technological advancements that have shaped the inte

In [41]:
def get_essay_content(subtopic_headings, search_results):
    essay_content = ""
    for i in range(len(result['search_results'])):
        essay_content += result['subtopic_headings'][i]
        essay_content += ":"
        essay_content += result['search_results'][i]
        essay_content += "\n\n"
    essay_content = essay_content.strip()
    return essay_content

In [42]:
essay_content_chain = RunnablePassthrough().assign(
    essay_content=lambda x: get_essay_content(x['subtopic_headings'], x['search_results'])
)

In [38]:
# essay_content = ""
# for i in range(len(result['search_results'])):
#     essay_content += result['subtopic_headings'][i]
#     essay_content += ":"
#     essay_content += result['search_results'][i]
#     essay_content += "\n\n"
# essay_content = essay_content.strip()

In [43]:
essay_prompt = PromptTemplate.from_template(
"""1. Write a highschool essay on the topic {topic}.
2. The essay should be well structured and have the following subheadings: {subtopic_headings}.
3. The essay should be written in a formal tone and should be suitable for a highschool student.
4. Output should be in a markdown format.
5. Summarize the essay only from the content provided.
Content: {essay_content}""")

model = OllamaLLM(model="llama3.2:latest", run_local=True)

essay_chain = essay_prompt | model | StrOutputParser()

In [44]:
sequence_chain = subtopic_chain |  passthrough_chain | search_query_chain | parallel_search_chain | essay_content_chain | essay_chain
result = sequence_chain.invoke({"topic": "History of the Internet", "number_of_subtopics": 3})
result

"# History of the Internet\n## Early Beginnings of the Internet\n\nThe history of TCP/IP (Transmission Control Protocol/Internet Protocol) is a fascinating story that spans over four decades. In the late 1960s, the United States Department of Defense's Advanced Research Projects Agency (ARPA) funded a project to create a network that could connect different computers across the country. This project was called ARPANET.\n\nIn 1972, Vint Cerf and Bob Kahn developed the first internet protocol, which they called IP. They also designed TCP, a transport-layer protocol that would ensure reliable data transfer between devices on the network.\n\nTCP/IP emerged as a cohesive system in the early 1970s, combining both protocols into one. The Internet Engineering Task Force (IETF) introduced the RFC format as a way to standardize internet protocols in 1969.\n\nAs ARPANET expanded, TCP/IP became the de facto standard for the network in the late 1970s. Other networks, such as the National Science Fo

In [None]:
# result['essay_content'] = essay_content

# final = essay_chain.invoke(result)

In [47]:
from IPython.display import display, Markdown
display(Markdown(result))

# History of the Internet
## Early Beginnings of the Internet

The history of TCP/IP (Transmission Control Protocol/Internet Protocol) is a fascinating story that spans over four decades. In the late 1960s, the United States Department of Defense's Advanced Research Projects Agency (ARPA) funded a project to create a network that could connect different computers across the country. This project was called ARPANET.

In 1972, Vint Cerf and Bob Kahn developed the first internet protocol, which they called IP. They also designed TCP, a transport-layer protocol that would ensure reliable data transfer between devices on the network.

TCP/IP emerged as a cohesive system in the early 1970s, combining both protocols into one. The Internet Engineering Task Force (IETF) introduced the RFC format as a way to standardize internet protocols in 1969.

As ARPANET expanded, TCP/IP became the de facto standard for the network in the late 1970s. Other networks, such as the National Science Foundation Network (NSFNET), adopted TCP/IP as well.

## Expansion and Evolution

In the 1980s, the internet grew rapidly, and TCP/IP became the dominant protocol on the network. The first Domain Name System (DNS) was introduced in 1983, making it easier for users to access websites using easy-to-remember domain names instead of IP addresses.

Modern TCP/IP has undergone numerous updates and improvements over the years, including the introduction of IPv6 in 1998, which provides a larger address space and improved security features.

Key players who contributed to the development and evolution of TCP/IP include Vint Cerf, Bob Kahn, Jon Postel, and Larry Roberts.

## The Modern Internet: Implications and Challenges

The modern internet has had a profound impact on global communication, e-commerce, and countless other applications that have transformed modern society. However, it also poses significant challenges to digital privacy, security, and regulation.

The rise of surveillance technologies, data breaches, and hacking threatens the integrity of personal data. Encryption is a powerful tool for protecting personal data, but it is not foolproof and can be circumvented by governments or hackers.

A balanced approach between security and privacy is needed to safeguard personal data. Individuals should take steps to protect their digital privacy, corporations and governments should prioritize transparency and accountability in how personal data is used, and stronger regulations and laws are needed to protect individual rights.

The modern internet also presents opportunities for innovation and growth, but it requires a nuanced approach that balances security, privacy, and regulation to ensure its continued development and success.

In [None]:
# Had to use duckduckgo_search == 8.0.2 because of rate limits issue.

In [20]:
# result["search_results"]


In [16]:
# links = DDGS().text('ARPANET history packet switching Vint Cerf Bob Kahn internet development', max_results=2)
# links


In [17]:
# content = ""
# for i in range(4):
#     if i>0:
#         content += "\n\n"
#     search = requests.get(links[0]['href']).text
#     search = BeautifulSoup(search, 'html.parser')
#     search = search.get_text(separator="\n", strip=True)
#     content += search
# print(content)
    

In [None]:
# for link in links:
# content = requests.get(links[0]['href']).text

In [None]:
# content = BeautifulSoup(content, 'html.parser')
# content = content.get_text(separator="\n", strip=True)

In [None]:
# content

In [18]:
# model = OllamaLLM(model="llama3.2:latest", run_local=True)

In [19]:
# model.invoke("Obtain the information described in the description from the content. Use only what is found in the content. Do not add anything in addition. Summarize it for a highschool report." \
# "Description:" \
# "The history of the internet began with the development of communication networks in the 1960s, focusing on military and academic uses. This section should discuss the origins of ARPANET, the first operational packet switching network, and its impact on the early internet era."\
# ""\
# "Content:" \
# + content)