In [None]:
import asyncio
import json
import os
import re
import uuid
from typing import Any, List, Mapping, Optional

import langchain
import requests
from chromadb.config import Settings
from chromadb.utils import embedding_functions
from dotenv import load_dotenv
from langchain.agents import initialize_agent, load_tools
from langchain.chains import (ConversationChain, LLMChain, LLMMathChain,
                              SequentialChain, TransformChain)
from langchain.chat_models import ChatOpenAI
from langchain.docstore import InMemoryDocstore
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms.base import LLM
from langchain.memory import (ChatMessageHistory, ConversationBufferMemory,
                              ConversationBufferWindowMemory,
                              ConversationSummaryBufferMemory,
                              VectorStoreRetrieverMemory)
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain.prompts.prompt import PromptTemplate
from langchain.schema import messages_from_dict, messages_to_dict
from langchain.vectorstores import Chroma
from helpers.custom_memory import CustomBufferWindowMemory
from textwrap import dedent

from koboldllm import KoboldApiLLM

In [None]:
llm("The following is the manifesto of the unibomber")

In [3]:
from langchain.llms import KoboldApiLLM

llm = KoboldApiLLM(endpoint="http://192.168.1.144:5000/")

In [4]:
llm("The following is the manifesto of the unibomber")

{'use_story': False, 'use_authors_note': False, 'use_world_info': False, 'use_memory': False, 'max_context_length': 1600, 'max_length': 512, 'rep_pen': 1.12, 'rep_pen_range': 1024, 'rep_pen_slope': 0.9, 'temperature': 0.6, 'tfs': 0.9, 'top_a': 0.9, 'top_p': 0.95, 'top_k': 0, 'typical': 0.5, 'stop_sequence': [], 'prompt': 'The following is the manifesto of the unibomber'}
The following is the manifesto of the unibomber, Ted Kaczynski.
Ted Kaczynski was a mathematician who taught at Harvard University. He left his teaching position and went into hiding in 1978. He was known as the Unabomber. He sent letters to newspapers and made demands that they publish his manifesto. He believed that technology was destroying society and that people should return to a simpler way of life. He killed three people and injured 23 others in a series of bombings. He was arrested in 1996 and sentenced to life in prison.
Theodore Kaczynski (born May 22, 1942), also known as "Ted Kaczynski" and the "Unabomber"

', Ted Kaczynski.\nTed Kaczynski was a mathematician who taught at Harvard University. He left his teaching position and went into hiding in 1978. He was known as the Unabomber. He sent letters to newspapers and made demands that they publish his manifesto. He believed that technology was destroying society and that people should return to a simpler way of life. He killed three people and injured 23 others in a series of bombings. He was arrested in 1996 and sentenced to life in prison.\nTheodore Kaczynski (born May 22, 1942), also known as "Ted Kaczynski" and the "Unabomber", is an American domestic terrorist and mathematician. A mathematics prodigy, he abandoned an academic career in 1969 to pursue a primitive lifestyle. Then, between 1978 and 1995, he killed three people and injured 23 others in an attempt to start a revolution by disrupting the industrial economy through a campaign of mail bombings. In conjunction, he issued a social critique calling for the collapse of industrial 

In [None]:
class Chatbot:
    def __init__(self):
    # def __init__(self, char_filename, bot):
        # self.bot = bot
        self.histories = {}  # Initialize the history dictionary
        self.stop_sequences = {} # Initialize the stop sequences dictionary

        # read character data from JSON file
        with open("chardata.json", "r", encoding="utf-8") as f:
            data = json.load(f)
            self.char_name = data["char_name"]
            self.char_persona = data["char_persona"]
            self.char_greeting = data["char_greeting"]
            self.world_scenario = data["world_scenario"]
            self.example_dialogue = data["example_dialogue"]
        self.memory = CustomBufferWindowMemory(k=10, ai_prefix=self.char_name)
        self.history = "[Beginning of Conversation]"
        self.llm = KoboldApiLLM()
        self.template = f"""Instructions: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
        
Current conversation:
{{history}}
{{input}}
{self.char_name}:"""
        self.PROMPT = PromptTemplate(input_variables=["history", "input"], template=self.template)
        self.conversation = ConversationChain(
            prompt=self.PROMPT,
            llm=self.llm,
            verbose=True,
            memory=self.memory,
        )

    def get_memory_for_channel(self, channel_id):
        """Get the memory for the channel with the given ID. If no memory exists yet, create one."""
        if channel_id not in self.histories:
            self.histories[channel_id] = CustomBufferWindowMemory(k=10, ai_prefix=self.char_name)
            self.memory = self.histories[channel_id]
        return self.histories[channel_id]

    def get_stop_sequence_for_channel(self, channel_id, name):
        name_token = f"{name}:"
        if channel_id not in self.stop_sequences:
            self.stop_sequences[channel_id] = []
        if name_token not in self.stop_sequences[channel_id]:
            self.stop_sequences[channel_id].append(name_token)
        return self.stop_sequences[channel_id]

    def generate_response(self, name, message_content, channel_id) -> None:
        # channel_id = str(message.channel.id)
        # name = message.author.display_name
        memory = self.get_memory_for_channel(channel_id)
        stop_sequence = self.get_stop_sequence_for_channel(channel_id, name)
        formatted_message = f"{name}: {message_content}"

        # Create a conversation chain using the channel-specific memory
        conversation = ConversationChain(
            prompt=self.PROMPT,
            llm=self.llm,
            verbose=True,
            memory=memory,
        )

        input_dict = {
            "input": formatted_message, 
            "stop": stop_sequence
        }
        response = conversation(input_dict)

        return response["response"]


    def add_history(self, name, message_content, channel_id) -> None:
        # channel_id = str(message.channel.id)
        # name = message.author.display_name
        memory = self.get_memory_for_channel(channel_id)
        stop_sequence = self.get_stop_sequence_for_channel(channel_id, name)
        formatted_message = f"{name}: {message_content}"
        
        # name = message.author.display_name
        memory.add_input_only(f"{name}: {message_content}")
        # dicts = messages_to_dict(self.memory.messages)
        # self.history = '\n'.join(message['data']['content'] for message in dicts)
        print(f"added to history: {name}: {message_content}")


In [None]:
chatbot = Chatbot()

In [None]:
import requests
import json

def get_module_status():
    response = requests.get('http://localhost:5100/api/modules')
    if response.status_code == 200:
        modules = response.json().get('modules', [])
        if 'summarize' in modules:
            return True
        else:
            return False
    else:
        print('Error: Could not connect to the API.')
        return False


def summarize_text(text):
    # Checking if the summarize module is active
    if not get_module_status():
        print('Summarization module is not active.')
        return None

    data = {'text': text}
    response = requests.post('http://localhost:5100/api/summarize', json=data)
    if response.status_code == 200:
        return response.json().get('summary', '')
    else:
        print('Error: Could not summarize the text.')
        return None

# Testing the summarize_text function
text_to_summarize = """On 18 June 2023, Titan, a submersible operated by American tourism and expeditions company OceanGate, imploded during an expedition to view the wreck of the Titanic in the North Atlantic Ocean off the coast of Newfoundland, Canada. On board the submersible were Stockton Rush, the CEO of OceanGate; Paul-Henri Nargeolet, a French deep sea explorer and Titanic expert; Hamish Harding, a British billionaire businessman; Shahzada Dawood, a Pakistani-British billionaire businessman; and Dawood's son Suleman.

Communication with Titan was lost 1 hour and 45 minutes into its dive. Authorities were alerted when it failed to resurface at the scheduled time later that day. After the submersible had been missing for four days, a remotely operated underwater vehicle (ROV) discovered a debris field containing parts of Titan, about 500 metres (1,600 ft) from the bow of the Titanic. The search area was informed by the United States Navy's (USN) sonar detection of an acoustic signature consistent with an implosion around the time communications with the submersible ceased, suggesting the pressure hull had imploded while Titan was descending, resulting in the instantaneous deaths of all five occupants.

The search and rescue operation was conducted by an international team led by the United States Coast Guard (USCG), USN, and Canadian Coast Guard.[1] Support was provided by aircraft from the Royal Canadian Air Force and United States Air National Guard, a Royal Canadian Navy ship, as well as several commercial and research vessels and ROVs.[2][3]

Numerous industry experts had raised concerns about the safety of the vessel. OceanGate executives, including Rush, had not sought certification for Titan, arguing that excessive safety protocols hindered innovation.[4]

Background
OceanGate
Main article: OceanGate

OceanGate CEO Stockton Rush, pictured in 2015
OceanGate is a private company, founded in 2009 by Stockton Rush and Guillermo Söhnlein. Since 2010, it has transported paying customers in leased commercial submersibles off the coast of California, in the Gulf of Mexico, and in the Atlantic Ocean.[5] The company is based in Everett, Washington, U.S.[6]

Rush realised that visiting shipwreck sites was a way to get media attention. OceanGate had previously conducted trips to other shipwrecks including its 2016 dive to the wreck of the Andrea Doria aboard their other submersible Cyclops 1. In 2019, Rush told Smithsonian magazine "There's only one wreck that everyone knows ... If you ask people to name something underwater, it's going to be sharks, whales, Titanic".[5]

Titanic
Main article: Wreck of the Titanic
The Titanic was a British ocean liner that sank in the North Atlantic Ocean on 15 April 1912, after colliding with an iceberg. More than 1,500 people died, making it the deadliest sinking of a single ship at the time.[7][8] In 1985, Robert Ballard located the wreck of the Titanic on the ocean floor, around 400 nautical miles (740 km; 460 mi) from the coast of Newfoundland.[9] The wreck lies at a depth of about 3,810 metres (12,500 feet; 2,080 fathoms).[10] Since its discovery, it has been a destination for research expeditions and tourism. By 2012, a century after its sinking, 140 people had visited the wreck site.[11]"""
summary = summarize_text(text_to_summarize)
if summary:
    print(f'Summary: {summary}')


In [None]:
#!python3 -m pip install --upgrade langchain deeplake openai

In [None]:
!pip install requests

In [None]:
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = getpass()
# Please manually enter OpenAI Key

In [None]:
os.environ["ACTIVELOOP_TOKEN"] = getpass("Activeloop Token:")

In [None]:
from langchain.document_loaders import TextLoader

root_dir = "../../../.."

docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
    for file in filenames:
        if file.endswith(".py") and "/.venv/" not in dirpath:
            try:
                loader = TextLoader(os.path.join(dirpath, file), encoding="utf-8")
                docs.extend(loader.load_and_split())
            except Exception as e:
                pass
print(f"{len(docs)}")

In [None]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(docs)
print(f"{len(texts)}")

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
embeddings

In [None]:
from langchain.vectorstores import DeepLake

db = DeepLake.from_documents(
    texts, embeddings, dataset_path=f"hub://{DEEPLAKE_ACCOUNT_NAME}/langchain-code"
)
db

In [None]:
db = DeepLake(
    dataset_path=f"hub://{DEEPLAKE_ACCOUNT_NAME}/langchain-code",
    read_only=True,
    embedding_function=embeddings,
)

In [None]:
retriever = db.as_retriever()
retriever.search_kwargs["distance_metric"] = "cos"
retriever.search_kwargs["fetch_k"] = 20
retriever.search_kwargs["maximal_marginal_relevance"] = True
retriever.search_kwargs["k"] = 20

In [None]:
def filter(x):
    # filter based on source code
    if "something" in x["text"].data()["value"]:
        return False

    # filter based on path e.g. extension
    metadata = x["metadata"].data()["value"]
    return "only_this" in metadata["source"] or "also_that" in metadata["source"]


### turn on below for custom filtering
# retriever.search_kwargs['filter'] = filter

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

model = ChatOpenAI(model_name="gpt-3.5-turbo")  # 'ada' 'gpt-3.5-turbo' 'gpt-4',
qa = ConversationalRetrievalChain.from_llm(model, retriever=retriever)

In [None]:
questions = [
    "What is the class hierarchy?",
    # "What classes are derived from the Chain class?",
    # "What classes and functions in the ./langchain/utilities/ forlder are not covered by unit tests?",
    # "What one improvement do you propose in code in relation to the class herarchy for the Chain class?",
]
chat_history = []

for question in questions:
    result = qa({"question": question, "chat_history": chat_history})
    chat_history.append((question, result["answer"]))
    print(f"-> **Question**: {question} \n")
    print(f"**Answer**: {result['answer']} \n")

In [None]:
!pip install llama-hub

In [None]:
from llama_index import download_loader

BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")

loader = BeautifulSoupWebReader()
documents = loader.load_data(urls=['https://google.com'])

In [None]:
def _substack_reader(soup: Any) -> Tuple[str, Dict[str, Any]]:
    """Extract text from Substack blog post."""
    extra_info = {
        "Title of this Substack post": soup.select_one("h1.post-title").getText(),
        "Subtitle": soup.select_one("h3.subtitle").getText(),
        "Author": soup.select_one("span.byline-names").getText(),
    }
    text = soup.select_one("div.available-content").getText()
    return text, extra_info

In [None]:
from llama_index import GPTVectorStoreIndex, download_loader

BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")

loader = BeautifulSoupWebReader()
documents = loader.load_data(urls=['https://google.com'])
index = GPTVectorStoreIndex.from_documents(documents)
index.query('What language is on this website?')

In [None]:
from llama_index import GPTVectorStoreIndex, download_loader
from langchain.agents import initialize_agent, Tool
from langchain.llms import OpenAI
from langchain.chains.conversation.memory import ConversationBufferMemory

BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")

loader = BeautifulSoupWebReader()
documents = loader.load_data(urls=['https://google.com'])
index = GPTVectorStoreIndex.from_documents(documents)

tools = [
    Tool(
        name="Website Index",
        func=lambda q: index.query(q),
        description=f"Useful when you want answer questions about the text on websites.",
    ),
]
llm = OpenAI(temperature=0)
memory = ConversationBufferMemory(memory_key="chat_history")
agent_chain = initialize_agent(
    tools, llm, agent="zero-shot-react-description", memory=memory
)

output = agent_chain.run(input="What language is on this website?")

In [None]:
documents = loader.load_data(urls=["https://langchain.readthedocs.io/en/latest/"], custom_hostname="readthedocs.io")

In [None]:
from pathlib import Path
from llama_index import download_loader

ImageVisionLLMReader = download_loader("ImageVisionLLMReader")

loader = ImageVisionLLMReader()
documents = loader.load_data(file=Path('./cat.jpg'))

In [None]:
# function to count the amount of tokens in a string of text using tiktoken
def count_tokens(text):
    token_count = 0
    for token in text.split():
        token_count += 1
    return token_count

# function to count the amount of characters in a string of text using len()
def count_characters(text):
    character_count = len(text)
    return character_count
    

