In [None]:
# This script installs a list of Python packages using the pip package manager.

# Import necessary modules for installing packages.
import subprocess

# List of packages to be installed.
packages_to_install = [
    'google-search-results',    # Provides an interface to fetch Google Search results.
    'langchain',                # A library for handling natural language processing tasks.
    'sentence_transformers',    # Utilizes transformer models for sentence embeddings.
    'protobuf==3.20.0',         # Protocol Buffers - Google's data interchange format.
    'llama-cpp-python',         # Python bindings for the LLAMA-CPP library.
    'gpt4all',                  # A library for accessing GPT-4 based language models.
    'faiss-cpu',                # A library for efficient similarity search and clustering of dense vectors.
    'pydantic==1.10.11',        # Data validation and settings management using Python type annotations.
    'typing-inspect==0.8.0',    # Tools for runtime inspection of types in Python code.
    'typing_extensions==4.5.0', # Back-ported and experimental type hints for Python 3.6+.
    'requests',                 # A library for making HTTP requests in Python.
    'beautifulsoup4'            # A tool for web scraping HTML and XML documents.
]

# Loop through the list of packages and install them using pip.
for package in packages_to_install:
    subprocess.run(['pip', 'install', package])

# Print a message indicating the successful installation of packages.
print("All specified packages have been successfully installed.")

In [None]:
# Importing required modules and classes for the LangChain application.

# Importing embeddings module for handling Hugging Face embeddings.
from langchain.embeddings import HuggingFaceEmbeddings

# Importing a callback handler for streaming standard output during execution.
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# Importing modules for LLMS (Language Learning Model System) components.
from langchain.llms import GPT4All, LlamaCpp

# Importing required built-in modules.
import os             # For interacting with the operating system.
import argparse       # For parsing command-line arguments.
import time           # For working with time-related operations.

# Importing modules related to different components of the LangChain application.
from langchain.chains import RetrievalQA                # Chain module for Retrieval-based Question Answering.
from langchain.document_loaders import TextLoader       # For loading text documents.
from langchain.text_splitter import CharacterTextSplitter  # For splitting text into characters.
from langchain.memory import ConversationBufferMemory   # Memory module for conversation buffers.
from langchain.vectorstores import FAISS                # For utilizing FAISS vector store.

# Importing a module for interfacing with Google Search API.
from serpapi import GoogleSearch

# Importing the requests module for making HTTP requests.
import requests

# Importing BeautifulSoup for parsing HTML documents.
from bs4 import BeautifulSoup

# Importing the CallbackManager class for managing callback functions.
from langchain.callbacks.manager import CallbackManager

# Importing the PromptTemplate class for managing prompt templates.
from langchain import PromptTemplate

In [None]:
# Creating an instance of the TextLoader class to load text from a file.
loader = TextLoader("source_documents/raghav.txt")

# Loading documents using the TextLoader instance.
documents = loader.load()

# Creating an instance of the CharacterTextSplitter class for splitting text into chunks.
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

# Splitting the loaded documents into chunks using the TextSplitter instance.
texts = text_splitter.split_documents(documents)

In [None]:
# Creating an instance of HuggingFaceEmbeddings with a specific model name.
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Using the HuggingFaceEmbeddings instance to generate embeddings for the loaded texts.
# These embeddings will be used for semantic similarity calculations.
# The generated embeddings can be later used for efficient retrieval.
db = FAISS.from_documents(texts, embeddings)

# Creating a retriever instance from the FAISS vector store.
# This retriever is used for efficient similarity search and retrieval of documents.
retriever = db.as_retriever()

In [None]:
# Creating an instance of CallbackManager with a list containing a StreamingStdOutCallbackHandler.
# CallbackManager manages callback functions during LLMS (Language Learning Model System) execution.
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Creating an instance of the GPT4All class (a Language Learning Model) with specified parameters.
llm = GPT4All(
    model="/Users/raghavsrivastava/Desktop/privateGPT-main/groovy.bin",  # Path to the GPT-4 model file.
    callback_manager=callback_manager,  # Assigning the CallbackManager instance.
    verbose=True,  # Enabling verbose mode for more detailed output.
)

In [None]:
# A template string for generating prompts that involve answering questions based on text.
template = """Answer the question based on the text (.txt) file given to you.
If the required answer is not specified in the
text then respond with "IDK".

{context}

Question: {question}

Answer: """

# Creating an instance of PromptTemplate for generating prompts with dynamic variables.
# The input_variables parameter defines placeholders that can be replaced with actual values.
# The template parameter specifies the template string to be used for generating prompts.
prompt_template = PromptTemplate(
    input_variables=["question", "context"],  # List of input variables used in the template.
    template=template  # The template string for generating prompts.
)

In [None]:
# Defining a dictionary that holds keyword arguments for specifying the chain type.
# In this case, the "prompt" key is used to associate the defined prompt_template with the chain type.
chain_type_kwargs = {
    "prompt": prompt_template  # The "prompt_template" will be used for generating prompts in the chain.
}

In [None]:
# Creating a RetrievalQA instance from a specified chain type configuration.

# The "qa" variable will hold the RetrievalQA instance.
# The "RetrievalQA.from_chain_type" method is used to create an instance from a specific chain type.

# The "llm" parameter specifies the Language Learning Model (LLM) instance to be used.
# The "chain_type" parameter specifies the chain type configuration name ("stuff" in this case).
# The "retriever" parameter specifies the retriever instance used for document retrieval.

# The "return_source_documents" parameter indicates whether to include source documents in the output.
# The "chain_type_kwargs" parameter provides additional keyword arguments for the chain type.

qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [None]:
qa.run

In [None]:
# Taking user input to set the value of the "prompt" variable.

# The "input" function is used to display the given prompt string ("prompt:")
# and to receive user input as a string. The input is stored in the "prompt" variable.
prompt = input(str("prompt:"))

In [None]:
# Writing the prompt and answer to an output file.

# The "output_file_path" variable holds the path to the output file to be created or appended.
output_file_path = "raghav.txt"

# Opening the output file in "append" mode using a context manager.
# The "output_file" variable represents the opened file.
with open(output_file_path, "a") as output_file:
    # Generating a response by invoking the "qa" instance with the user input prompt.
    res = qa(prompt)
    answer = res['result']  # Extracting the answer from the response.

    # Writing the prompt and answer to the output file.
    output_file.write("Prompt:\n")
    output_file.write(prompt + "\n\n")
    output_file.write("Answer:\n")
    output_file.write(answer + "\n\n")

In [None]:
# Function to fetch data from a given URL using the requests library.

# The function "getdata" takes a single parameter "url", which is the URL of the website to fetch data from.
def getdata(url):
    # Sending a GET request to the specified URL using the "requests.get" function.
    r = requests.get(url)

    # Returning the text content of the response.
    # This will be the HTML content of the page as a string.
    return r.text

In [None]:
while True:
    prompt = input("prompt:")

    # Retrieve answer using the previously configured QA instance
    res = qa(prompt)
    answer = res['result']

    # Output results to a file
    with open(output_file_path, "a") as output_file:
        output_file.write("Prompt:\n")
        output_file.write(prompt + "\n\n")

        # Check if "IDK" is not in the answer
        if "IDK" not in answer:
            output_file.write("Answer:\n")
            output_file.write(answer + "\n\n")
        else:
            # If "IDK" is in the answer, perform a Google search
            params = {
                "engine": "google",
                "q": prompt,
                "location": "Seattle-Tacoma, WA, Washington, United States",
                "api_key": "8ba225e0c4c155a4cf4f72fb31a87f6f28193c023a1ebe496e3627aef582a347",
                "output": "json|html"
            }

            # Perform a Google search using the SerpApi library
            search = GoogleSearch(params)
            results = search.get_dict()
            organic_results = results["organic_results"]

            # Extract and append relevant data from search results to the output file
            for i in organic_results:
                if i['position'] == 1:
                    url = i['link']
                    htmldata = getdata(url)
                    soup = BeautifulSoup(htmldata, "html.parser")
                    for data in soup.find_all("p"):
                        with open("raghav.txt", "a") as file:
                            file.write(str(data) + "\n")
                            print("Data scraped and appended to 'raghav.txt' successfully.")