## Requirements

In [None]:
%pip install pypdf
%pip install pandas
%pip install chromadb
%pip install langchain
%pip install "unstructured[all-docs]"
%pip install python-dotenv
%pip install google-generativeai
%pip install langchain-google-genai 
%pip install "playwright"
%pip install selenium

## Imports

In [30]:
import os
from pathlib import Path as p
from dotenv import load_dotenv

import warnings
from pprint import pprint
from IPython.display import display

import textwrap
import pandas as pd

import google.generativeai as genai

from langchain import PromptTemplate
from langchain.vectorstores import Chroma
from langchain.chains import ConversationChain, LLMChain, ConversationalRetrievalChain
from langchain.document_loaders import SeleniumURLLoader
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.conversation.memory import ConversationSummaryMemory

warnings.filterwarnings("ignore")

## Parameters

In [78]:
load_dotenv('./keys/.env')

data_list = [
    "https://borgenproject.org/tag/homelessness-in-india/",
    "https://www.noblenonprofit.org/homelessness-in-india-an-unsolved-problem/",
    "https://caufsociety.com/homelessness-in-india/",
    "https://en.wikipedia.org/wiki/Homelessness_in_India",
    "https://www.hlrn.org.in/homelessness",
    "https://blog.ipleaders.in/prevention-homelessness-problem-treated-legally/",
    "https://borgenproject.org/homelessness-in-india/",
    "https://www.aljazeera.com/gallery/2023/1/3/photos-new-delhis-homeless-shiver-in-biting-cold",
    "https://www.quora.com/In-India-if-a-person-suddenly-becomes-homeless-what-are-some-tips-you-will-give-him",
    "https://leaglesamiksha.com/2022/06/19/homelessness-in-india-causes-and-remedies/",
    "https://www.orfonline.org/expert-speak/sheltering-the-urban-homeless"
]

## Configure LLM

In [6]:
genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))

In [7]:
model = ChatGoogleGenerativeAI(
    model="gemini-pro",
    temperature=0.3
)

In [8]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

## Loading Data

In [9]:
data_loader = SeleniumURLLoader(urls=data_list)
pages = data_loader.load()

## Prompt Design - In Context

In [50]:
prompt_template = """Answer the question as precise as possible using the provided context. If the answer is
                    not contained in the context also refreter summary of past Conversation, say "answer not available in context" \n\n
                    History: \n {history}\n
                    Context: \n {context}?\n
                    Question: \n {question} \n
                    Answer:
                  """

prompt = PromptTemplate(
    input_variables=[
      "history",
      "context", 
      "question",
    ],
    template=prompt_template
)

In [70]:
prompt = PromptTemplate(
    input_variables=[
      "history",
      "context", 
      "question",

    ],
    template=(
        """Answer the question as precise as possible using the provided context. If the answer is
                    not contained in the context also refreter summary of past Conversation, say "answer not available in context":\n

        History:{history}\n

        Context:{context}\n

        Question:{question}\n
        """
    )
)

In [69]:
summary_memory=ConversationSummaryMemory(
    llm=model,
    memory_key="history",
    input_key="question"
)

In [71]:
conversation = LLMChain(
    llm=model, 
    verbose=False,
    memory=summary_memory,
    prompt=prompt
)
chain = LLMChain(
    llm=model,
    prompt=prompt,
    memory=summary_memory, # Contains the input_key
    verbose=False
)

In [53]:
# stuff_chain = load_qa_chain(
#     model, 
#     chain_type="stuff", 
#     prompt=prompt
# )

## RAG pipeline

In [54]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0)
context = "\n\n".join(str(p.page_content) for p in pages)
texts = text_splitter.split_text(context)

In [55]:
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

In [56]:
question = input('Question: ') 
docs = vector_index.get_relevant_documents(question)

In [75]:
docs_str = [str(x.page_content) for x in docs]

In [80]:
stuff_answer = conversation(
    {"context": docs_str, "question": question}
)

In [None]:
# stuff_answer = conversation.predict(
#     context=docs_str,
#     question=question,
# )

In [83]:
pprint(stuff_answer)

{'context': ['Posts\n'
             '\n'
             'Global Poverty, Homeslessness\n'
             '\n'
             'Homeless in India: Causes and Aid Available\n'
             '\n'
             'Homelessness in India is on the rise. Many people are turning to '
             'the streets as a place to sleep and find income by performing '
             'hard labor. Children living on the street are also becoming very '
             'common for many reasons, including abuse and family '
             'abandonment.\n'
             '\n'
             'Causes\n'
             '\n'
             'Factors that contribute to homelessness include impairment, a '
             'shortage of housing affordability, irregular or long-term '
             'unemployment and shifts in business. Policymakers state that the '
             'cause of homelessness is substance addiction, mental illness, '
             'relationship failures and domestic abuse. Prime Minister Modi '
             'has set a goal