In [6]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [7]:
text_loader = TextLoader("doc.txt")
text_doc = text_loader.load()

text_doc

[Document(metadata={'source': 'doc.txt'}, page_content='The dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Transformer,\nbased solely on attention mechanisms, dispensing with recurrence and convolutions\nentirely. Experiments on two machine translation tasks show these models to\nbe superior in quality while being more parallelizable and requiring significantly\nless time to train. Our model achieves 28.4 BLEU on the WMT 2014 English to-German translation task, improving over the existing best results, including\nensembles, by over 2 BLEU. On the WMT 2014 English-to-French translation task,\nour model establishes a new single-model state-of-the-art BLEU score of 41.8 after\ntraining for 3.5 days on eight GPUs, a small fraction of the training cost

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=128, chunk_overlap=64)
pdf_split = text_splitter.split_documents(text_doc)
pdf_split, len(pdf_split)

([Document(metadata={'source': 'doc.txt'}, page_content='The dominant sequence transduction models are based on complex recurrent or'),
  Document(metadata={'source': 'doc.txt'}, page_content='convolutional neural networks that include an encoder and a decoder. The best'),
  Document(metadata={'source': 'doc.txt'}, page_content='performing models also connect the encoder and decoder through an attention'),
  Document(metadata={'source': 'doc.txt'}, page_content='mechanism. We propose a new simple network architecture, the Transformer,'),
  Document(metadata={'source': 'doc.txt'}, page_content='based solely on attention mechanisms, dispensing with recurrence and convolutions'),
  Document(metadata={'source': 'doc.txt'}, page_content='entirely. Experiments on two machine translation tasks show these models to'),
  Document(metadata={'source': 'doc.txt'}, page_content='be superior in quality while being more parallelizable and requiring significantly'),
  Document(metadata={'source': 'doc

In [9]:
%%time

embedding = OllamaEmbeddings(model="llama3")
embds = embedding.embed_documents(pdf_split)
embds, len(embds), len(embds[0])

CPU times: total: 93.8 ms
Wall time: 5min 41s


([[1.2669237852096558,
   -4.56882381439209,
   -1.991895079612732,
   1.3661861419677734,
   -0.007045166566967964,
   -3.5238568782806396,
   0.9267991781234741,
   1.243589162826538,
   0.26260289549827576,
   -1.877408504486084,
   1.8037965297698975,
   -2.1863086223602295,
   -5.4294328689575195,
   3.1615519523620605,
   1.4575881958007812,
   3.374591588973999,
   0.07364723831415176,
   3.312180280685425,
   -3.99194598197937,
   2.8065428733825684,
   -1.9493095874786377,
   1.3369154930114746,
   2.9204673767089844,
   1.1514989137649536,
   -1.1691871881484985,
   1.8696010112762451,
   -1.5919852256774902,
   3.6007235050201416,
   4.655327320098877,
   -3.203712224960327,
   -0.9739953875541687,
   4.474459171295166,
   -0.5537883043289185,
   5.124572277069092,
   1.7814079523086548,
   -1.7783219814300537,
   -0.20980723202228546,
   -0.9989951848983765,
   2.519618034362793,
   4.735248565673828,
   0.47169408202171326,
   -2.4756906032562256,
   0.37630847096443176,
 

In [10]:
db = Chroma.from_documents(documents=pdf_split, embedding=embedding)
db

<langchain_community.vectorstores.chroma.Chroma at 0x19c79f85be0>

In [11]:
query = [
    "Transformers are best when it comes to sequence related tasks.",
    "Attention mechanism is the basis of transformers."
]

db.similarity_search(query=query)

[Document(metadata={'source': 'doc.txt'}, page_content='the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention.'),
 Document(metadata={'source': 'doc.txt'}, page_content='The code we used to train and evaluate our models is available at https://github.com/\ntensorflow/tensor2tensor.'),
 Document(metadata={'source': 'doc.txt'}, page_content='other tasks by applying it successfully to English constituency parsing both with\nlarge and limited training data.'),
 Document(metadata={'source': 'doc.txt'}, page_content='layers. On both WMT 2014 English-to-German and WMT 2014 English-to-French translation tasks, we achieve a new state of the art.')]

In [14]:
query = "Transformers are best when it comes to sequence related tasks."

db.similarity_search(query=query)

[Document(metadata={'source': 'doc.txt'}, page_content='the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention.'),
 Document(metadata={'source': 'doc.txt'}, page_content='the former task our best model outperforms even all previously reported ensembles.'),
 Document(metadata={'source': 'doc.txt'}, page_content='images, audio and video. Making generation less sequential is another research goals of ours.'),
 Document(metadata={'source': 'doc.txt'}, page_content='The code we used to train and evaluate our models is available at https://github.com/\ntensorflow/tensor2tensor.')]

In [15]:
db.similarity_search_with_relevance_scores(query)

  db.similarity_search_with_relevance_scores(query)


[(Document(metadata={'source': 'doc.txt'}, page_content='the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention.'),
  -5593.559799351364),
 (Document(metadata={'source': 'doc.txt'}, page_content='the former task our best model outperforms even all previously reported ensembles.'),
  -8430.867190900346),
 (Document(metadata={'source': 'doc.txt'}, page_content='images, audio and video. Making generation less sequential is another research goals of ours.'),
  -8722.502472364074),
 (Document(metadata={'source': 'doc.txt'}, page_content='The code we used to train and evaluate our models is available at https://github.com/\ntensorflow/tensor2tensor.'),
  -8903.704109314855)]

In [16]:
embdx = embedding.embed_query(query)
db.similarity_search_by_vector_with_relevance_scores(embdx)

[(Document(metadata={'source': 'doc.txt'}, page_content='the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention.'),
  7911.90234375),
 (Document(metadata={'source': 'doc.txt'}, page_content='the former task our best model outperforms even all previously reported ensembles.'),
  11924.4609375),
 (Document(metadata={'source': 'doc.txt'}, page_content='images, audio and video. Making generation less sequential is another research goals of ours.'),
  12336.8955078125),
 (Document(metadata={'source': 'doc.txt'}, page_content='The code we used to train and evaluate our models is available at https://github.com/\ntensorflow/tensor2tensor.'),
  12593.1533203125)]

In [18]:
# Saving the model, saved as sqllite3

chroma_store = Chroma.from_documents(documents=pdf_split, embedding=embedding, persist_directory="./chroma-stor")

In [21]:
db2 = Chroma(persist_directory="./chroma-stor", embedding_function=embedding)

In [22]:
db2.similarity_search_with_relevance_scores(query=query)

  db2.similarity_search_with_relevance_scores(query=query)


[(Document(metadata={'source': 'doc.txt'}, page_content='the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention.'),
  -5569.534418986773),
 (Document(metadata={'source': 'doc.txt'}, page_content='the former task our best model outperforms even all previously reported ensembles.'),
  -8426.593396461043),
 (Document(metadata={'source': 'doc.txt'}, page_content='images, audio and video. Making generation less sequential is another research goals of ours.'),
  -8759.375082108107),
 (Document(metadata={'source': 'doc.txt'}, page_content='The code we used to train and evaluate our models is available at https://github.com/\ntensorflow/tensor2tensor.'),
  -8933.240402240719)]

In [23]:
retriever=db2.as_retriever()
retriever.invoke(query)

[Document(metadata={'source': 'doc.txt'}, page_content='the recurrent layers most commonly used in encoder-decoder architectures with multi-headed self-attention.'),
 Document(metadata={'source': 'doc.txt'}, page_content='the former task our best model outperforms even all previously reported ensembles.'),
 Document(metadata={'source': 'doc.txt'}, page_content='images, audio and video. Making generation less sequential is another research goals of ours.'),
 Document(metadata={'source': 'doc.txt'}, page_content='The code we used to train and evaluate our models is available at https://github.com/\ntensorflow/tensor2tensor.')]