In [47]:
from crewai_tools import ScrapeWebsiteTool
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv

In [27]:
load_dotenv()

True

In [28]:
# To enable scrapping any website it finds during it's execution
tool = ScrapeWebsiteTool()

# Initialize the tool with the website URL, so the agent can only scrap the content of the specified website
tool = ScrapeWebsiteTool(website_url='https://animemangatoon.com/castle-swimmer-unveiling-new-prophecy/')

# Extract the text from the site
text = tool.run()

Using Tool: Read website content


In [33]:
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    chunks = text_splitter.split_text(text)
    return chunks

In [34]:
def get_vector_store(text_chunks):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    try:
        vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
        return vector_store
    except IndexError as e:
        raise IndexError(f"An error occurred during vector store creation: {e}")

In [54]:
def get_conversational_chain():
    prompt_template = """
    Please extract and summarize the relevant information from the provided context. 
    Context: 
    {context}
    Question: 
    {question}
    Answer:
    """
    
    model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
    prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
    return chain

In [55]:
chunks = get_text_chunks(text)

In [56]:
vector_store = get_vector_store(chunks)

In [59]:
def user_input(user_question):
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
    docs = vector_store.similarity_search(user_question)
    chain = get_conversational_chain()
    
    response = chain(
        {"input_documents":docs,
         "question": user_question},
         return_only_outputs=True
    )
    print(response["output_text"])

In [60]:
user_input('What is Castle Swimmer about?')

Castle Swimmer is a webtoon that follows the journey of Siren, a prince who is cursed to live a life of torment. The story explores the themes of prophecy, destiny, and the struggle against fate.
