<a href="https://colab.research.google.com/github/Naveenand/Langchain/blob/main/buiding_research_assistent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install requests beautifulsoup4 openai langchain langsmith duckduckgo-search

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import requests
from bs4 import BeautifulSoup
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
from langchain.utilities import DuckDuckGoSearchAPIWrapper
import json

In [None]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

In [None]:
import os
from uuid import uuid4

unique_id = uuid4().hex[0:8]
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"Tracing Walkthrough - {unique_id}"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

In [None]:
os.environ["LANGCHAIN_API_KEY"] = "your_langsmith_api"

In [None]:
url = 'https://blog.langchain.dev/pinecone-serverless/'

In [None]:
RESULTS_PER_QUESTION = 2

ddg_search = DuckDuckGoSearchAPIWrapper()


In [None]:
def web_search(query: str, num_results: int = RESULTS_PER_QUESTION):
    results = ddg_search.results(query, num_results)
    return [r["link"] for r in results]

In [None]:
def scrape_text(url: str):
    # Send a GET request to the webpage
    try:
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the content of the request with BeautifulSoup
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract all text from the webpage
            page_text = soup.get_text(separator=" ", strip=True)

            # Print the extracted text
            return page_text
        else:
            return f"Failed to retrieve the webpage: Status code {response.status_code}"
    except Exception as e:
        print(e)
        return f"Failed to retrieve the webpage: {e}"

In [None]:
SUMMARY_TEMPLATE = """{text}
-----------
Using the above text, answer in short the following question:
> {question}
-----------
if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available."""


In [None]:
SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)

In [None]:
page_content = scrape_text(url)[:10000]

In [None]:
scrape_and_summarize_chain = RunnablePassthrough.assign(
    summary = RunnablePassthrough.assign(
    text=lambda x: scrape_text(x["url"])[:10000]
) | SUMMARY_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser()
) | (lambda x: f"URL: {x['url']}\n\nSUMMARY: {x['summary']}")

In [None]:
web_search_chain = RunnablePassthrough.assign(
    urls = lambda x: web_search(x['question'])
) | (lambda x: [{'question': x['question'],'url': u} for u in x['urls']]) | scrape_and_summarize_chain.map()

In [None]:
SEARCH_PROMPT = ChatPromptTemplate.from_messages(
    [
        (
            "user",
            "Write 1 google search queries to search online that form an "
            "objective opinion from the following: {question}\n"
            "You must respond with a list of strings in the following format: "
            '["query 1"].',
        ),
    ]
)

In [None]:
search_question_chain = SEARCH_PROMPT | ChatOpenAI(model ='gpt-3.5-turbo-1106',temperature=0.1) | StrOutputParser() | json.loads

In [None]:
full_research_chain  = search_question_chain | (lambda x: [{'question':q} for q in x]) | web_search_chain.map()

In [None]:
WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."

In [None]:
RESEARCH_REPORT_TEMPLATE = """Information:
--------
{research_summary}
--------
Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
The report should focus on the answer to the question, should be well structured, informative, \
in depth, with facts and numbers if available and a minimum of 1,200 words.
You should strive to write the report as long as you can using all relevant and necessary information provided.
You must write the report with markdown syntax.
You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
You must write the report in apa format.
Please do your best, this is very important to my career."""

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", WRITER_SYSTEM_PROMPT),
        ("user", RESEARCH_REPORT_TEMPLATE),
    ]
)


In [None]:
def collapse_list_of_lists(list_of_lists):
    content = []
    for l in list_of_lists:
        content.append("\n\n".join(l))
    return "\n\n".join(content)

In [None]:
chain = RunnablePassthrough.assign(
    research_summary= full_research_chain | collapse_list_of_lists
) | prompt | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser()

In [None]:
chain.invoke(
    {
        'question': "how is the difference between langsmith and langchain"
    }
)

"# LangSmith vs LangChain: Understanding the Difference\n\nLangSmith and LangChain are two integral components in the realm of LLM (Language, Logic, and Meaning) applications. Both developed by the same company, these platforms serve distinct purposes while also complementing each other to enhance the development and deployment of LLM applications. In this comprehensive report, we will delve into the differences between LangSmith and LangChain, exploring their individual functionalities, features, and how they work in tandem to provide a comprehensive ecosystem for LLM application development.\n\n## LangSmith: A Platform for Debugging, Testing, and Monitoring LLM Applications\n\nLangSmith, as described, is a platform specifically designed for debugging, testing, evaluating, and monitoring LLM applications. It serves as a comprehensive toolkit for developers and engineers working with LLM frameworks, providing essential features to streamline the development process. Some of the key fun