# 2.3 Vectorstores and Embeddings - part 2

## Using other embedding models

In [None]:
%pip install python-dotenv langchain langchain-openai chromadb docarray --upgrade --quiet

In [None]:
#import os
#import openai

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

#openai.api_key  = os.environ['OPENAI_API_KEY']

In [None]:
from langchain.llms import OpenAI

# !pip install -U langchain-community
# !pip install -U gpt4all
from langchain_community.embeddings import GPT4AllEmbeddings
# !pip install -U chromadb
from langchain_community.vectorstores import Chroma

# !pip install -U sentence-transformers
from langchain.embeddings import HuggingFaceEmbeddings

# !pip install -U bs4
# from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import CharacterTextSplitter

# !pip install -U unstructured
from langchain_community.document_loaders import UnstructuredMarkdownLoader

In [None]:
# create the open-source embedding function
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
#embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
#embedding_function = GPT4AllEmbeddings()

In [None]:
loaders = [
    UnstructuredMarkdownLoader("../data/listing1.md"),
    UnstructuredMarkdownLoader("../data/listing2.md"),
    UnstructuredMarkdownLoader("../data/listing3.md"),
]
documents = []
for loader in loaders:
    documents.extend(loader.load())


In [None]:
from langchain.text_splitter import CharacterTextSplitter

# split it into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=32)
splitDocs = text_splitter.split_documents(documents)

# embeddings = []
# for sp in splitDocs:
#     embeddings = embedding.embed_query(sp.page_content)

print(f"splitDocs count: {len(splitDocs)}")

In [None]:
#persist_directory = '../db/chroma-hugging-1/'
#!rm -rf ../db/chroma-hugging-1  # remove old database files if any

print('Loading the vector store...')
vectorstore = Chroma.from_documents(documents=splitDocs, embedding=embedding_function) #, persist_directory=persist_directory)

In [None]:
question = "I'm looking for a 2-bedroom apartment"
#question = "I'm looking for an apartment with a stackable washer."
#question = "I'm looking for an electric car with autopilot"

print("Similarity search...")
docs = vectorstore.similarity_search(question, k=1)

length = len(docs)
print(f"Result: {length}")

for d in docs:
    print(d)