Imports

In [None]:
import os

# For token tracking
from langchain.callbacks import get_openai_callback # For tracking LLM calls
from num_tokens_from_string import num_tokens_from_string # For tracking embedding

# For document loading
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader

# For splitting document into chunks
from langchain.text_splitter import CharacterTextSplitter

# For embedding
from langchain.embeddings import OpenAIEmbeddings

# For vector store
import weaviate
from langchain.vectorstores import Weaviate

Load text files from file directory and split into chunks

In [None]:
# Load text files from folder
loader = DirectoryLoader('./documents', glob="**/*.txt", loader_cls=TextLoader, show_progress=True)
documents = loader.load()

# split into chunks
text_splitter = CharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, length_function=len
)
docs = text_splitter.split_documents(documents)

In [None]:
# Declaring OpenAI Key
os.environ["OPENAI_API_KEY"]= ""
embedding = OpenAIEmbeddings()

Initialize Vector Store Instance

I have created the schema in weaviate_vectorstore.py

In [None]:
client = weaviate.Client("http://localhost:8080")
# Set-Up Weaviate (done in weaviate_vectorstore.py)
vectorstore = Weaviate(client, "Paragraph", "content")

Store the docs

In [None]:
text_meta_pair = []
for doc in docs:
    text_meta_pair.append((doc.page_content, doc.metadata))

texts, meta = zip(*text_meta_pair)
texts = list(texts)
meta = list(meta)
vectorstore.add_texts(texts)

Perform a semantic search

In [None]:
query = "Tell me about the iPhone10"
response = vectorstore.similarity_search(query)

for chunk in response:
    print(chunk.page_content)
    print("*" * 80)