In [3]:
import os
from dotenv import load_dotenv

load_dotenv()

# openai
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# huggingface
HF_TOKEN = os.getenv("HF_ACCESS_TOKEN")

# google
GOOGLE_CSE_ID = os.getenv("GOOGLE_CSE_ID")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
SERPAPI_KEY = os.getenv("SERPAPI_API_KEY")

# Things to try

In [None]:
https://python.langchain.com/docs/modules/data_connection/retrievers/web_research
https://www.mlq.ai/gpt-3-enabled-research-assistant-langchain-pinecone/
https://python.langchain.com/docs/integrations/tools/wikipedia

# German laws to english summary

HTML2Text converts HTML into plain text (with markdown-like formatting) without any specific tag manipulation.
To extract human-readable text without needing to manipulate specific HTML elements.

In [None]:
import openai
from langchain.document_loaders import AsyncHtmlLoader
from langchain.document_transformers import Html2TextTransformer
from translate import Translator

urls = ["https://www.gesetze-im-internet.de/baf_gzuschlagsv/BJNR009350986.html",
        "https://www.gesetze-im-internet.de/baf_g/BJNR014090971.html"
        ]

loader = AsyncHtmlLoader(urls)
docs = loader.load()

html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)


# Language Translation
translator = Translator(to_lang='en', from_lang='de')
german_text = docs_transformed[0].page_content  # Assuming there's one document
english_text = translator.translate(german_text[:500])

# summarization
def summarize_text(text, person_type):
    res = openai.ChatCompletion.create(
        model="gpt-3.5-turbo", max_tokens=1000, temperature=0.7, top_p=0.5, frequency_penalty=0.5,
        messages=
       [ { "role": "system", "content": "You are a helpful assistant for text summarization.", },
         { "role": "user", "content": f"Summarize this for a {person_type}: {text}", }, ],
    )
    return res["choices"][0]["message"]["content"]

english_summary = summarize_text(english_text, 'person without legal training')
english_summary

# Identify Paragraph References
# You might need a parser specific to German legal texts to extract paragraph references.

# Include References in the Summary
# Incorporate the references into the English summary.

# Output
print(english_summary)

# Web retriever

In [1]:
import logging

from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models.openai import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers.web_research import WebResearchRetriever
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.vectorstores import Chroma
import nest_asyncio
nest_asyncio.apply()

# Vectorstore
vectorstore = Chroma(
    embedding_function=OpenAIEmbeddings(),
    persist_directory="./chroma_db_oai",
)

# LLM
llm = ChatOpenAI( temperature=0 )

# Custom Search with Google Programmable Search Engine
search = GoogleSearchAPIWrapper()

# Initialize
web_research_retriever = WebResearchRetriever.from_llm(
    vectorstore=vectorstore,
    llm=llm,
    search=search,
)

# logging
logging.basicConfig()
logging.getLogger("langchain.retrievers.web_research").setLevel(logging.INFO)

# retrieve docs and provide citations
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm, retriever=web_research_retriever
)

user_input = "Where can I find laws regulating the duties of landlords in Berlin Germany?"
docs = web_research_retriever.get_relevant_documents(user_input)

result = qa_chain({"question": user_input})
result

INFO:langchain.retrievers.web_research:Generating questions for Google Search ...
INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'Where can I find laws regulating the duties of landlords in Berlin Germany?', 'text': LineList(lines=['1. What are the legal obligations of landlords in Berlin, Germany?\n', '2. Where can I access information on landlord responsibilities in Berlin, Germany?\n', '3. What are the laws governing landlord duties in Berlin, Germany?'])}
INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What are the legal obligations of landlords in Berlin, Germany?\n', '2. Where can I access information on landlord responsibilities in Berlin, Germany?\n', '3. What are the laws governing landlord duties in Berlin, Germany?']
INFO:langchain.retrievers.web_research:Searching for relevant urls...
INFO:langchain.retrievers.web_research:Searching for relevant urls...
INFO:langchain.retrievers.web_research:Search results: [{

{'question': 'Where can I find laws regulating the duties of landlords in Berlin Germany?',
 'answer': 'You can find laws regulating the duties of landlords in Berlin, Germany in the "Arbeitsschutzgesetz" (Occupational Safety and Health Act) and the "Grundgesetz" (Basic Law). Specifically, the "Arbeitsschutzgesetz" outlines the powers of the competent authorities in requesting information and documents from employers or responsible persons for the purpose of monitoring their surveillance tasks. The "Grundgesetz" guarantees property rights and the right of inheritance, and also allows for expropriation for the public good under certain conditions. Additionally, the "Grundgesetz" protects civil and political rights and ensures that they are not dependent upon or restricted by the exercise of religious freedom. \n',
 'sources': ''}

In [71]:
result

[Document(page_content='ohne Einverstandnis der Bewohner oder Nutzungsberechtigten nur treffen, soweit\nsie zur Verhutung dringender Gefahren fur die offentliche Sicherheit und\nOrdnung erforderlich sind. Die auskunftspflichtige Person hat die Maßnahmen\nnach den Satzen 1, 2, 5 und 6 zu dulden. Die Satze 1 und 5 gelten\nentsprechend, wenn nicht feststeht, ob in der Arbeitsstatte Personen\nbeschaftigt werden, jedoch Tatsachen gegeben sind, die diese Annahme\nrechtfertigen. Das Grundrecht der Unverletzlichkeit der Wohnung (Artikel 13\ndes Grundgesetzes) wird insoweit eingeschrankt.', metadata={'source': 'https://www.gesetze-im-internet.de/arbschg/BJNR124610996.html'})]

In [76]:
docs = [
{
    'page_content': 'ohne ...',
    'metadata': {'source': 'https://www.gesetze-im-internet.de/arbschg/BJNR124610996.html'}
},
]

# Extract unique sources
sources = set()
for d in docs:
    sources.add(d['metadata']['source'])

print(sources)

{'https://www.gesetze-im-internet.de/arbschg/BJNR124610996.html'}


## WebRetriever with custom prompt and output parsing.

In [19]:
import os
import re
from typing import List
from langchain.chains import LLMChain
from pydantic import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.output_parsers.pydantic import PydanticOutputParser

# LLMChain
search_prompt = PromptTemplate(
    input_variables=["question"],
    template="""You are an assistant tasked with improving Google search 
    results. Generate FIVE Google search queries that are similar to
    this question. Transalte your queries into german as your searching for german official documents.
    Return the sources of your search.
    The output should be a numbered list of questions and each
    should have a question mark at the end: {question}""",
)

class LineList(BaseModel):
    """List of questions."""

    lines: List[str] = Field(description="Questions")

class QuestionListOutputParser(PydanticOutputParser):
    """Output parser for a list of numbered questions."""

    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = re.findall(r"\d+\..*?\n", text)
        return LineList(lines=lines)
    
llm_chain = LLMChain(
            llm=llm,
            prompt=search_prompt,
            output_parser=QuestionListOutputParser(),
        )

search = GoogleSearchAPIWrapper()

# Initialize
web_research_retriever_llm_chain = WebResearchRetriever(
    vectorstore=vectorstore,
    llm_chain=llm_chain, 
    search=search, 
)


# logging
logging.basicConfig()
logging.getLogger("langchain.retrievers.web_research").setLevel(logging.INFO)

# retrieve docs and provide citations
qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
    llm, retriever=web_research_retriever, 
)


# Run
user_input = "Where can I find laws regulating the duties of landlords in Berlin Germany?"
docs = web_research_retriever_llm_chain.get_relevant_documents(user_input)
result = qa_chain({"question": user_input})
result

Fetching pages: 100%|##########| 1/1 [00:00<00:00,  1.72it/s]
INFO:langchain.retrievers.web_research:Generating questions for Google Search ...
INFO:langchain.retrievers.web_research:Questions for Google Search (raw): {'question': 'Where can I find laws regulating the duties of landlords in Berlin Germany?', 'text': LineList(lines=['1. What are the legal obligations of landlords in Berlin, Germany?\n', '2. Where can I access information on landlord responsibilities in Berlin, Germany?\n', '3. What are the laws governing landlord duties in Berlin, Germany?'])}
INFO:langchain.retrievers.web_research:Questions for Google Search: ['1. What are the legal obligations of landlords in Berlin, Germany?\n', '2. Where can I access information on landlord responsibilities in Berlin, Germany?\n', '3. What are the laws governing landlord duties in Berlin, Germany?']
INFO:langchain.retrievers.web_research:Searching for relevant urls...
INFO:langchain.retrievers.web_research:Searching for relevant url

{'question': 'Where can I find laws regulating the duties of landlords in Berlin Germany?',
 'answer': 'You can find laws regulating the duties of landlords in Berlin, Germany in the following ',
 'sources': ''}

# Google Programmable Search 
https://cse.google.com/cse.js?cx=8566b4b2cb2364df3

In [4]:
from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper



def search(query):
    search = GoogleSearchAPIWrapper(google_api_key=GOOGLE_API_KEY, google_cse_id=GOOGLE_CSE_ID)
    return search.results(query, 10)


tool = Tool(
    name="Google Search Snippets",
    description="Search Google for recent results.",
    func=search,
)


tool.run("Where can I find laws concerning the duties of landlords in berlin, germany?")

[{'title': 'Basic Law for the Federal Republic of Germany',
  'link': 'https://www.gesetze-im-internet.de/englisch_gg/englisch_gg.html',
  'snippet': 'Germans in the Länder of Baden-Württemberg, Bavaria, Berlin, Brandenburg, Bremen, Hamburg, Hesse, Lower Saxony, Mecklenburg-Western Pomerania, North Rhine-\xa0...'},
 {'title': 'Connecticut Law About Landlord/Tenant Law',
  'link': 'https://www.jud.ct.gov/lawlib/law/landlord.htm',
  'snippet': 'Connecticut Law About Landlord/Tenant Law: Useful links to information on summary process and landlord and tenant laws.'},
 {'title': 'German Criminal Code (Strafgesetzbuch – StGB)',
  'link': 'https://www.gesetze-im-internet.de/englisch_stgb/englisch_stgb.html',
  'snippet': 'Regardless of which law is applicable at the place where the offence was committed, German criminal law applies to offences committed on a ship or an aircraft\xa0...'},
 {'title': 'Renting, Security Deposits, and Evictions - Consumer Sourcebook ...',
  'link': 'https://www.d

# Plan-and-execute

In [8]:
from langchain import LLMMathChain, SerpAPIWrapper
from langchain.agents.tools import Tool
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.utilities import GoogleSearchAPIWrapper
from langchain_experimental.plan_and_execute import (
    PlanAndExecute,
    load_agent_executor,
    load_chat_planner,
)


def search(query):
    search = GoogleSearchAPIWrapper(google_api_key=GOOGLE_API_KEY, google_cse_id=GOOGLE_CSE_ID)
    return search.results(query, 10)

llm = OpenAI(temperature=0)

llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)

tools = [
    Tool(
        name="Search",
        func=search,
        description="Find offical german documents. translate the user input into german and search for pdf, doc, docx files and provide their sources?",
    ),
]

model = ChatOpenAI(temperature=0)
planner = load_chat_planner(model)
executor = load_agent_executor(model, tools, verbose=True)
agent = PlanAndExecute(planner=planner, executor=executor, verbose=True)

agent.run(
    "What law regulates the duties of a landlord in berlin?"
)



[1m> Entering new PlanAndExecute chain...[0m
steps=[Step(value='Research the legal system in Berlin to understand how landlord-tenant relationships are regulated.'), Step(value='Identify the specific law or laws that govern the duties of landlords in Berlin.'), Step(value='Review the relevant law(s) to understand the specific duties and responsibilities imposed on landlords.'), Step(value='Summarize the key points of the law(s) that regulate the duties of landlords in Berlin.'), Step(value='Given the above steps taken, provide the user with the information about the specific law that regulates the duties of landlords in Berlin.')]

[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction:
{
  "action": "Search",
  "action_input": "Legal system in Berlin landlord-tenant regulations"
}[0m

[1m> Finished chain.[0m
*****

Step: Research the legal system in Berlin to understand how landlord-tenant relationships are regulated.

Response: Action:
{
  "action": "Search",
  "act

"The search for the specific law that regulates the duties of landlords in Berlin did not yield relevant results. However, I can provide you with some general information about landlord-tenant regulations in Germany.\n\nIn Germany, the duties of landlords are primarily governed by the German Civil Code (Bürgerliches Gesetzbuch or BGB). The BGB contains provisions that outline the rights and obligations of both landlords and tenants. It covers various aspects such as rent, maintenance, repairs, termination of the tenancy, and more.\n\nTo access the full text of the BGB in English, you can visit the German Civil Code BGB website at [https://www.gesetze-im-internet.de/englisch_bgb/](https://www.gesetze-im-internet.de/englisch_bgb/). Additionally, you can refer to the Landlord-tenant law - Wikipedia page for a general overview of landlord-tenant law in Germany.\n\nAnother helpful resource is the Your guide to German rental laws and tenant rights website, which provides a guide to German re

# Tools

## PSE & Wiki 

In [24]:
from langchain import OpenAI
from langchain.agents import AgentType, Tool, initialize_agent, load_tools
from langchain.llms.openai import OpenAI
from langchain.tools import Tool, WikipediaQueryRun
from langchain.utilities import (
    GoogleSearchAPIWrapper,
    WikipediaAPIWrapper,

)


def pse(query):
    search = GoogleSearchAPIWrapper(google_api_key=GOOGLE_API_KEY, google_cse_id=GOOGLE_CSE_ID)
    return search.results(query, 5)

wiki = WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper())


tools = [
    Tool(
        name="pse",
        description="use this tool first. SEARCH IN GERMAN. Find offical german documents and laws concerning the users problem.\
            Retrieve sources and links to the document and give a short summary of its content.\
            Find official documents of Germany, preferably in pdf fromat",
        func=pse,
    ),

    Tool(
        name="wiki",
        description="use this tool when the first failed. \
            Find official documents of Germany, preferably in pdf fromat\
            Look in the footnotes and links of wikipedia pages for links to pdf's and sources\
            follow the links and find the source where the file can be downloaded",
        func=wiki.run,
    ),
]

agent = initialize_agent(
    tools,
    OpenAI(temperature=0),
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
)


agent.run(
    "What law regulates the duties of a landlord in berlin?"
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should look for official documents related to this topic
Action: pse
Action Input: "landlord duties berlin"[0m
Observation: [36;1m[1;3m[{'title': 'Landlord and tenant obligations in Germany - Berlin', 'link': 'https://aden-immo.com/en/landlord-guide/landlord-and-tenant-obligations-in-germany/', 'snippet': 'Discover the rights and duties of tenants and their landlord ✓✓✓ Find out everything there is to know before renting your property!'}, {'title': 'Rent price cap: you should observe these obligations as a landlord ...', 'link': 'https://www.engelvoelkers.com/en-de/berlincommercial/rent-price-cap-obligations-for-owners-landlords/', 'snippet': 'UPDATE: The Berlin rent cap was declared invalid by a decision of the Federal Constitutional Court in Karlsruhe on April 15, 2021.'}, {'title': "What you need to know about German renters' rights", 'link': 'https://www.lemonade.com/de/blog/renters-rights/', 'snippet': 'Not only la

'The law that regulates the duties of a landlord in Berlin is the German rental law, which is outlined in the rental contract (Mietvertrag) that must be signed by both the landlord and the tenant. The contract must contain the rights and obligations of both parties, as well as the terms and conditions of the rental agreement.'

# Serper, serapi, google-search, searchNG

In [79]:
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI

llm = OpenAI(temperature=0)


tools = load_tools(["google-serper", "serpapi", "google-search", "searx-search"], searx_host="http://localhost:8888", llm=llm)

agent = initialize_agent(
    tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)

agent.run("Where can I find the law regulating the duties of a landlord in berlin germany?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I should search for the law online
Action: google_search
Action Input: "landlord duties berlin germany"[0m
Observation: [38;5;200m[1;3mFull text in format: HTML PDF · text in German ; Section 239Surety ; Section 240Duty to supplement security ; Book 2. Law of obligations ; Division 1. Subject ... Legal obligations of secrecy, especially tax secrecy pursuant to Section 30 of the German Fiscal Code, as well as professional or special official secrecy ... 1 Act of 30.6.1959 101-3; Entry into force in Berlin on 10.8.1951 pursuant ... Where these duties are performed by an expert, the provisions of the General ...[0m
Thought:[32;1m[1;3m I should look for a specific law
Action: google_search
Action Input: "landlord duties berlin germany law"[0m
Observation: [38;5;200m[1;3mFull text in format: HTML PDF · text in German ; Section 239Surety ; Section 240Duty to supplement security ; Book 2. Law of obligations ; Division 1. S

'The law regulating the duties of a landlord in Berlin, Germany is found in Book 2 of the German Code of Civil Procedure, Section 239Surety, Section 240Duty to supplement security, and Book 10 of the German Code of Civil Procedure. Additionally, the German Civil Code and the Act of 30.6.1959 101-3 are also relevant.'

# Langchain + Streamlit Searchbar

In [None]:
import streamlit as st

from langchain.agents import initialize_agent, AgentType
from langchain.callbacks import StreamlitCallbackHandler
from langchain.chat_models import ChatOpenAI
from langchain.tools import DuckDuckGoSearchRun

with st.sidebar:
    openai_api_key = st.text_input(
        "OpenAI API Key", key="langchain_search_api_key_openai", type="password"
    )
    "[Get an OpenAI API key](https://platform.openai.com/account/api-keys)"
    "[View the source code](https://github.com/streamlit/llm-examples/blob/main/pages/2_Chat_with_search.py)"
    "[![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/streamlit/llm-examples?quickstart=1)"

st.title("🔎 LangChain - Chat with search")

"""
In this example, we're using `StreamlitCallbackHandler` to display the thoughts and actions of an agent in an interactive Streamlit app.
Try more LangChain 🤝 Streamlit Agent examples at [github.com/langchain-ai/streamlit-agent](https://github.com/langchain-ai/streamlit-agent).
"""

if "messages" not in st.session_state:
    st.session_state["messages"] = [
        {
            "role": "assistant",
            "content": "Hi, I'm a chatbot who can search the web. How can I help you?",
        }
    ]

for msg in st.session_state.messages:
    st.chat_message(msg["role"]).write(msg["content"])

if prompt := st.chat_input(placeholder="Who won the Women's U.S. Open in 2018?"):
    st.session_state.messages.append({"role": "user", "content": prompt})
    st.chat_message("user").write(prompt)

    if not openai_api_key:
        st.info("Please add your OpenAI API key to continue.")
        st.stop()

    llm = ChatOpenAI(
        model_name="gpt-3.5-turbo", openai_api_key=openai_api_key, streaming=True
    )
    search = DuckDuckGoSearchRun(name="Search")
    search_agent = initialize_agent(
        [search],
        llm,
        agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
        handle_parsing_errors=True,
    )
    with st.chat_message("assistant"):
        st_cb = StreamlitCallbackHandler(st.container(), expand_new_thoughts=False)
        response = search_agent.run(st.session_state.messages, callbacks=[st_cb])
        st.session_state.messages.append({"role": "assistant", "content": response})
        st.write(response)

# Webretriver

In [None]:
import logging

import nest_asyncio
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chat_models.openai import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers.web_research import WebResearchRetriever
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.vectorstores import Chroma

nest_asyncio.apply()

# Vectorstore
vectorstore = Chroma(
    embedding_function=OpenAIEmbeddings(),
    persist_directory="./chroma_db_oai",
)

class DocumentRetrievalSystem:
    def __init__(self):
        # Initialize LLM
        self.llm = ChatOpenAI(temperature=0)

        # Initialize Custom Search with Google Programmable Search Engine
        self.search = GoogleSearchAPIWrapper()

        # Initialize Web Research Retriever
        self.web_research_retriever = WebResearchRetriever.from_llm(
            vectorstore=vectorstore,
            llm=self.llm,
            search=self.search,
        )

        # Set up logging
        logging.basicConfig()
        logging.getLogger("langchain.retrievers.web_research").setLevel(logging.INFO)

        # Set up QA Chain
        self.qa_chain = RetrievalQAWithSourcesChain.from_chain_type(
            self.llm, retriever=self.web_research_retriever
        )

    def get_answer(self, user_input):
        # Retrieve relevant documents
        docs = self.web_research_retriever.get_relevant_documents(user_input)

        # Get answer from QA chain
        result = self.qa_chain({"question": user_input})

        # Extract unique sources
        sources = set()
        for d in docs:
            sources.add(d.metadata['source'])
        
        return result['answer'], sources

