In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader 
from langchain_huggingface import HuggingFaceEmbeddings


embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
text_spliter = RecursiveCharacterTextSplitter(chunk_size =1000, chunk_overlap = 200, separators=["\n\n","\n"," ", ""])

loader = WebBaseLoader("https://www.senate.gov/about/origins-foundations/senate-and-constitution/constitution.htm")
documents = loader.load()

chunks = text_spliter.split_documents(documents)

print("Number of chunks ", len(chunks))

chunks_embeding = embeddings.embed_documents(chunks[0].page_content)


len(chunks_embeding[0])




Number of chunks  81


768

In [14]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

chunks_embed = embeddings.embed_documents([chunk.page_content for chunk in chunks])

chunks_embed[0]

[0.017226191237568855,
 0.0037627771962434053,
 0.052760299295186996,
 0.02907864935696125,
 0.001788336900062859,
 0.03238198161125183,
 -0.04508353769779205,
 -0.029636958613991737,
 -0.0022332402877509594,
 0.042850296944379807,
 0.028031816706061363,
 -0.059227392077445984,
 -0.007647684775292873,
 -0.002832260448485613,
 0.022844186052680016,
 -0.0036348311696201563,
 0.04547900706529617,
 0.04452522471547127,
 0.023076815530657768,
 0.05369081720709801,
 0.0019162829266861081,
 -0.02035505324602127,
 -0.02312334068119526,
 0.018203234300017357,
 -0.014899899251759052,
 0.048526447266340256,
 -0.03870949521660805,
 -0.05564490333199501,
 0.03282397985458374,
 -0.01035199873149395,
 0.034336067736148834,
 -0.020343422889709473,
 -0.02375143952667713,
 -0.013713490217924118,
 0.01264339592307806,
 0.017040088772773743,
 -0.00774073600769043,
 -0.011619827710092068,
 0.028357498347759247,
 0.0076942103914916515,
 0.011224358342587948,
 -0.010811441577970982,
 0.03435933217406273,
 -0

In [15]:
from langchain_community.vectorstores import FAISS

vector_store = FAISS.from_documents(documents=chunks, embedding=embeddings)

vector_store.index.ntotal


81

In [25]:
query = "what article is necessary for freedom of speech ?"



docs = vector_store.similarity_search(query,k=3)

for doc in docs:
    print("-"*80)
    print(doc.page_content)
    print("\n -")

--------------------------------------------------------------------------------
AMENDMENTS

Amendment I (1791)   Amendment II (1791)   
Amendment III (1791)   Amendment IV (1791)   
Amendment V (1791)   Amendment VI (1791)   
Amendment VII (1791)   Amendment VIII (1791)   
Amendment IX (1791)   Amendment X (1791)   
Amendment XI (1795/1798)   Amendment XII (1804)   
Amendment XIII (1865)   Amendment XIV (1868)   
Amendment XV (1870)   Amendment XVI (1913)   
Amendment XVII (1913)   Amendment XVIII (1919)   
Amendment XIX (1920)   Amendment XX (1933)   
Amendment XXI (1933)   Amendment XXII (1951)   
Amendment XXIII (1961)   Amendment XXIV (1964)   
Amendment XXV (1967)   Amendment XXVI (1971)   
Amendment XXVII (1992)   

















Amendment I (1791)






Congress shall make no law respecting an establishment of religion, or prohibiting the free exercise thereof; or abridging the freedom of speech, or of the press; or the right of the people peaceably to assemble, and to petiti