In [None]:
# from openai import OpenAI
import toml
# from ollama import chat
# from ollama import ChatResponse
from ollama import Client
from langchain_core.prompts import PromptTemplate
import json
from langchain_ollama.llms import OllamaLLM
from langchain.chains.llm import LLMChain
from duckduckgo_search import DDGS
# from langchain_core.output_parsers import JsonOutputParser

In [2]:

# Load a TOML file
with open('../secrets.toml', 'r') as f:
    openai_key = toml.load(f).get('api_keys').get('openai')

assert openai_key, "OpenAI API key not found in secrets.toml"


In [3]:
class LLMModel:
    def __init__(self, model_name: str, run_local: bool = True, system_prompt: str = "you are a helpful assistant"):
        self.run_local = run_local
        self.model_name = model_name
        self.system_prompt = system_prompt
        self.messages = [{"role": "system", "content": self.system_prompt}]
        
        if self.run_local:
            self.client = Client(
                host='http://localhost:11434',
                # headers={'x-some-header': 'some-value'}
                )

    def add_message(self, role: str, content: str) -> bool:
        if role not in ["system", "user", "assistant"]:
            raise ValueError("Role must be 'system', 'user', or 'assistant'")
        self.messages.append({"role": role, "content": content})
        return True

    def clear_messages(self) -> bool:
        self.messages = []
        return True

    def local_generate(self, prompt: str, max_tokens: int = 1500, **kwargs) -> str:
        self.messages.append({"role": "user", "content": prompt})
        response = self.client.chat(model=self.model_name, messages=self.messages, **kwargs)
        self.messages.append({"role": "assistant", "content": response.message.content.strip()})
        return response.message.content.strip()
    
    # question: how do I set max token in ollama python api?
    


In [4]:
model = LLMModel(model_name="llama3.2:latest", run_local=True)

In [5]:
# model.local_generate("What is the capital of India?")

In [6]:
# model.local_generate("What is the weather like there?")

In [7]:
# model.local_generate("What is the best month to visit?")

In [8]:
# model.local_generate("What is the best month to visit? Just give me the month name, no other text.")

In [9]:
# model.messages

In [None]:
# model.clear_messages()

True

In [None]:
# topic = "The Emergency in India (1975-1977)"
# number_of_subtopics = 5

In [None]:
# subtopics_template = PromptTemplate.from_template(
#     """Generate a list of {number_of_subtopics} subheadings for a highschool essay on {topic}.
#     Order the subheadings such that they have a logical sequence.
#     Each subheading should have a one-line discription.
#     Provide output in json format as follows:

#     {{<heading1>:<description1>,
#     <heading2>:<description2>,...}}"""
# )

In [6]:
# subtopics_prompt = subtopics_template.format(topic=topic, number_of_subtopics=number_of_subtopics)
# subtopics_prompt

In [7]:
# topics = model.local_generate(subtopics_prompt, format='json')
# topics = json.loads(topics)
# topics

In [8]:
# for k,v in topics.items():
#     print(k)
#     print(v)

In [9]:
def generate_subtopics(topic: str, number_of_subtopics: int, model: LLMModel) -> dict:
    subtopics_template = PromptTemplate.from_template(
    """Generate a list of {number_of_subtopics} subheadings for a highschool essay on {topic}.
    Order the subheadings such that they have a logical sequence.
    Each subheading should have a one-line discription.
    Provide output in json format as follows:

    {{<heading1>:<description1>,
    <heading2>:<description2>,...}}"""
    )
    
    subtopics_prompt = subtopics_template.format(topic=topic, number_of_subtopics=number_of_subtopics)

    topics = model.local_generate(subtopics_prompt, format='json')
    topics = json.loads(topics)
    return topics

In [11]:
generate_subtopics("History of the Internet", 6, model)

{'Foundations of Networking': 'Exploring the early beginnings of computer networking and its precursors',
 'Birth of ARPANET and TCP/IP': 'Introducing the development of the first network that would later become the foundation of the internet',
 'The Emergence of the World Wide Web': 'Describing how Tim Berners-Lee created the fundamental technologies for modern web browsing',
 'Growth and Expansion of the Internet (1990s-2000s)': 'Discussing the rapid expansion of the internet during this period and its impact on society',
 'Challenges and Security Concerns in the Modern Internet': 'Examining the issues that arose with the increasing popularity of social media, e-commerce, and cybersecurity threats',
 'The Evolving Digital Landscape Today': 'Describing the current state of the internet, including emerging trends and technologies'}

In [12]:
generate_subtopics("Theory of Evolution", 10, model)

{'Introduction to Evolutionary Theory': 'Introducing the fundamental concept of evolution and its significance in biology',
 'History of Evolutionary Thought': 'Exploring the key milestones and contributors in the development of evolutionary theory',
 "Charles Darwin's Contributions": "Discussing the groundbreaking work of Charles Darwin and his book 'On the Origin of Species'",
 'The Mechanisms of Natural Selection': 'Describing the process by which natural selection shapes the evolution of species',
 'Adaptation and Variation': 'Exploring how populations adapt to their environments through genetic variation',
 'Mating and Reproduction in Evolutionary Contexts': 'Examining how mating strategies influence evolutionary outcomes',
 'Species Diversification and Radiation': 'Discussing the mechanisms that lead to the creation of new species',
 'Evolution vs. Creationism: Scientific Consensus': 'Comparing and contrasting the scientific views on evolution with creationist perspectives',
 'Cr

In [None]:
def subtopic_chain() -> LLMChain:
    subtopics_template = PromptTemplate.from_template(
        """Generate a list of {number_of_subtopics} subheadings for a highschool essay on {topic}.
        Order the subheadings such that they have a logical sequence.
        Each subheading should be accompanied with a detailed discription on what the contents should be about.
        Provide output in json format as follows:

        {{'<heading1>':'<description1>',
        '<heading2>':'<description2>',...}}
        
        Only return the json object without any additional text."""
    )
    
    model = OllamaLLM(model="llama3.2:latest", run_local=True, format='json')

    return LLMChain(llm=model, prompt=subtopics_template)

In [52]:
chain = subtopic_chain()
subtopics = chain.run(topic="History of the Internet", number_of_subtopics=6)
subtopics = json.loads(subtopics)
subtopics

{'Early Beginnings of the Internet': "This section will explore the origins and early development of the internet, including key events such as ARPANET's creation in 1969 and the Network Control Protocol (NCP) that enabled communication between different networks.",
 'The Dawn of the World Wide Web': 'This subsection will discuss how Tim Berners-Lee invented the world wide web in 1989, and how it revolutionized the way people access and share information online.',
 'Internet Expansion and Growth': 'This section will cover the rapid expansion and growth of the internet in the 1990s, including the development of dial-up connections, the rise of email, and the emergence of online communities.',
 "The Internet's Impact on Society": 'This subsection will examine how the internet has transformed the way people communicate, access information, and conduct business, as well as its impact on education, healthcare, and other areas of society.',
 'Security Concerns and Cybercrime': 'This section 

In [59]:
def search_query_chain() -> LLMChain:
    search_query_template = PromptTemplate.from_template(
        """Generate a search query for Duckduckgo on the topic: {search_details}.
        The query should be concise and relevant to the topic.
        Return only the search query without any additional text."""
    )
    
    model = OllamaLLM(model="llama3.2:latest", run_local=True)

    return LLMChain(llm=model, prompt=search_query_template)

In [57]:
def get_search_queries(subtopics: dict, search_query_chain: LLMChain) -> dict:
    search_queries = {}
    for subtopic, desc in subtopics.items():
        search_query = search_query_chain.run(search_details=desc)
        search_queries[subtopic] = search_query
    return search_queries

In [60]:
search_query_chain = search_query_chain()
search_queries = get_search_queries(subtopics, search_query_chain)
search_queries

{'Early Beginnings of the Internet': '"ARPANET creation and Network Control Protocol early development"',
 'The Dawn of the World Wide Web': '"Tim Berners-Lee invention of the World Wide Web"',
 'Internet Expansion and Growth': '"Internet 1990s expansion and growth"',
 "The Internet's Impact on Society": '"Internet\'s impact on communication, information, business, education, healthcare, and society"',
 'Security Concerns and Cybercrime': '"security measures against hacking and cybercrime"',
 'The Modern Internet: Challenges and Future Directions': '"internet trends 2023 regulation blockchain misinformation"'}

In [None]:
# def information_chain() -> LLMChain:
#     information_template = PromptTemplate.from_template(
#         """Generate details for the"""
#     )
    
#     model = OllamaLLM(model="llama3.2:latest", run_local=True)

#     return LLMChain(llm=model, prompt=information_template)

In [None]:
# results = DDGS().text("the emergency")

In [None]:
# import requests
# from bs4 import BeautifulSoup

In [None]:
# query = "what is the best search engine"
# url = "https://www.duckduckgo.com/?t=h_&q=" + query.replace(" ", "+") +"&t=h_&ia=web"
# response = requests.get(url)
# response.raise_for_status()
# soup = BeautifulSoup(response.text, 'html.parser')

In [13]:
# response.content

In [None]:
# def get_

In [None]:
# chain for subtopics - chain for search queries - chain for information retrieval from search results - chain for summarization