In [1]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="llama3")

In [None]:
from langchain.embeddings import GROQEmbeddings 
from langchain.vectorstores import FAISS
from langchain.docstore.in_memory import InMemoryDocstore
import faiss

# Initialize GROQ embeddings
try:
    embeddings = GROQEmbeddings(api_key="GROQ_API_KEY")  # Replace with GROQ API key
    embedding_dimension = len(embeddings.embed_query("hello world"))
    print(f"Embedding dimension: {embedding_dimension}")
except Exception as e:
    print(f"Error initializing GROQ embeddings: {e}")
    raise

# Initialize FAISS
try:
    index = faiss.IndexFlatL2(embedding_dimension)
    vector_store = FAISS(
        embedding_function=embeddings,
        index=index,
        docstore=InMemoryDocstore({}),
        index_to_docstore_id={},
    )
    print("FAISS vector store initialized successfully.")
except Exception as e:
    print(f"Error initializing FAISS: {e}")
    raise


ImportError: cannot import name 'GROQEmbeddings' from 'langchain.embeddings' (/home/codespace/.python/current/lib/python3.12/site-packages/langchain/embeddings/__init__.py)

In [13]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

# Specify the URL of the website
url = "https://en.wikipedia.org/wiki/Football"

# Use WebBaseLoader to extract data
loader = WebBaseLoader(web_path=url)
bs_kwargs = dict(parse_only = bs4.SoupStrainer(class_ = "mw-content-ltr mw-parser-output" ),)
documents = loader.load(**bs_kwargs)

print(f"Number of documents loaded: {len(documents)}")


TypeError: BaseLoader.load() got an unexpected keyword argument 'parse_only'

In [3]:
documents

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Football', 'title': 'Football - Wikipedia', 'language': 'en'}, page_content='\n\n\n\nFootball - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1\nCommon elements\n\n\n\n\n\n\n

In [4]:
# Creating a LangChain documents
from langchain.schema import Document

langchain_docs = [Document(page_content=doc.page_content, metadata=doc.metadata) for doc in documents]
print(f"Created {len(langchain_docs)} LangChain documents.")

Created 1 LangChain documents.


In [5]:
# text splitter
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Max size of each chunk
    chunk_overlap=100  # Overlap between chunks for context
)

# Spliting documents into smaller chunks
split_docs = splitter.split_documents(langchain_docs)
print(f"Number of split documents: {len(split_docs)}")


Number of split documents: 160


In [6]:
from langchain.vectorstores import FAISS
from sentence_transformers import SentenceTransformer
from langchain_groq import ChatGroq
import os
import streamlit as st
from langchain.embeddings.base import Embeddings

os.environ["GROQ_API_KEY"] = st.secrets["GROQ_API_KEY"]
llm = ChatGroq(model="llama3-8b-8192", temperature=0.2)

class CustomSentenceTransformer(Embeddings):
    def __init__(self, model_name = "all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def embed_query(self, text:str) -> list:
        return self.model.encode(text).tolist()
    
    def embed_documents(self, texts:list) -> list:
        # return [self.model.encode(texts).tolist() for text in texts]
        return self.model.encode(texts).tolist()
        
        
embedder = CustomSentenceTransformer()

vector_store = FAISS.from_documents(split_docs, embedder)
print("FAISS vector store created.")

  from .autonotebook import tqdm as notebook_tqdm


FAISS vector store created.


In [11]:
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x796b6001a3f0>

In [8]:
retriever = vector_store.as_retriever()
retriever.search_kwargs = {"k": 5}

In [9]:
query = "What is the main purpose of this website?"

relevant_docs = retriever.get_relevant_documents(query)

context = '\n'.join([doc.page_content for doc in relevant_docs])
prompt = f"Using the following context, anser the question ::\n\n{context}\n\n Ouestion: {query}\nAnswer:"
print(prompt)

Using the following context, anser the question ::

This page was last edited on 10 January 2025, at 20:14 (UTC).
Text is available under the Creative Commons Attribution-ShareAlike 4.0 License;
additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.


Privacy policy
About Wikipedia
Disclaimers
Contact Wikipedia
Code of Conduct
Developers
Statistics
Cookie statement
Mobile view
11
References


















Toggle the table of contents







Football



38 languages




العربيةবাংলাCatalàDanskDeutschΕλληνικάEspañolEsperantoفارسیGaelg한국어HausaIsiZuluಕನ್ನಡLatinaLietuviųمصرى日本語Novialଓଡ଼ିଆਪੰਜਾਬੀPatoisភាសាខ្មែរPolskiРусскийसंस्कृतम्සිංහලSimple Englishسنڌيதமிழ்ไทยTürkçeУкраїнськаاردوئۇيغۇرچە / UyghurcheTiếng Việtייִדיש中文

Edit links











ArticleTalk





English

















ReadView sourceView history







Tools





Tools
move to sidebar

  relevant_docs = retriever.get_relevant_documents(query)


In [8]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_groq import ChatGroq

llm = ChatGroq(model="llama3-8b-8192", temperature=0.2)

# Filter relevant content (remove irrelevant text like footer or policies)
def filter_content(documents, keywords):
    filtered_docs = []
    for doc in documents:
        # if "Privacy policy" not in doc.page_content and "Terms of Use" not in doc.page_content:
        if keywords.lower() in doc.page_content.lower():
            filtered_docs.append(doc)
    return filtered_docs

query = "The earliest known matches involving non-public school clubs or institutions are"

# Retrieve relevant documents
relevant_docs = retriever.get_relevant_documents(query)

# Filter out irrelevant content
filtered_docs = filter_content(relevant_docs,query)

# Prepare the prompt using only relevant content
context = "\n".join([doc.page_content for doc in filtered_docs])

prompt = f"Using the following context, answer the question concisely:\n\n{context}\n\nQuestion: {query}\nAnswer:"

# prompt_template = PromptTemplate(input_variables=["context", "query"], template=prompt)
prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template="""Using the following context mentioned below in backticks, answer the question concisely and accurately:
    
    ```{context}```
    
    Question: {query}
    Answer:"""
)

# Run the LLM chain
chain = LLMChain(llm=llm, prompt=prompt_template)
response = chain.run({"context": context, "query": query})

# Print the response
print(response)

  chain = LLMChain(llm=llm, prompt=prompt_template)
  response = chain.run({"context": context, "query": query})


The earliest known matches involving non-public school clubs or institutions are as follows.


In [7]:
import os
import streamlit as st
from langchain.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain.schema import Document

# Set the environment variable for GROQ API key
os.environ["GROQ_API_KEY"] = st.secrets["GROQ_API_KEY"]

# Initialize ChatGroq language model
llm = ChatGroq(model="llama3-8b-8192", temperature=0.2)

# Define a custom embedding class for SentenceTransformer
class CustomSentenceTransformer(Embeddings):
    def __init__(self, model_name="all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def embed_query(self, text: str) -> list:
        return self.model.encode(text).tolist()

    def embed_documents(self, texts: list) -> list:
        return self.model.encode(texts).tolist()

# Initialize the custom embedder
embedder = CustomSentenceTransformer()

# Step 1: Extract data from website
url = "https://en.wikipedia.org/wiki/Football"  # Replace with your desired URL
loader = WebBaseLoader(web_path=url)
documents = loader.load()

# Step 2: Create LangChain documents
langchain_docs = [Document(page_content=doc.page_content, metadata=doc.metadata) for doc in documents]

# Step 3: Split documents into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
split_docs = splitter.split_documents(langchain_docs)

# Step 4: Create FAISS vector store using SentenceTransformer
vector_store = FAISS.from_documents(split_docs, embedder)
print("FAISS vector store created.")

# Step 5: Create retriever
retriever = vector_store.as_retriever()
retriever.search_kwargs = {"k": 5}

# Step 6: Fetch relevant documents and prepare the prompt
query = "in which year Ontario Rugby Football Union adopted the Burnside rules" 

relevant_docs = retriever.get_relevant_documents(query)
context = "\n".join([doc.page_content for doc in relevant_docs])
prompt = f"Using the following context, answer the question in detail based strictly on \
    the content from the website. Do not add any information beyond the content from the provided \
        context. Answer should be as specific and relevant as possible \
        to the query:\n\n{context}\n\nQuestion: {query}\nAnswer:"


# Step 7: Generate a response using ChatGroq
response = llm.predict(prompt)
print("Chatbot response:", response)


FAISS vector store created.
Chatbot response: According to the provided context, the Ontario Rugby Football Union adopted the Burnside rules in 1903.
