In [1]:
from langchain_community.llms import Ollama
llm = Ollama(model='gemma:2b-text')

#### load the data that we want to index

In [2]:
from langchain_community.document_loaders.csv_loader import CSVLoader

file_path = 'data/position_titles.csv'
loader = CSVLoader(file_path=file_path)
titles_data = loader.load()

#### index dataloader into a vectorstore

In [3]:
from langchain_community.embeddings import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="gemma:2b-text")

#### use the embedding model to ingest documents into a vectorstore

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter

print("Initializing text splitter...")
text_splitter = RecursiveCharacterTextSplitter()
print("Starting text splitting...")
documents = text_splitter.split_documents(titles_data)
print("Building Vector Store...")
vector = FAISS.from_documents(documents, embeddings)
print("Done.")

Initializing text splitter...
Starting text splitting...
Building Vector Store...


### Now that we have this data indexed in a vectorstore, we will create a retrieval chain. This chain will take an incoming question, look up relevant documents, then pass those documents along with the original question into an LLM and ask it to answer the original question

##### First, let's set up the chain that takes a question and the retrieved documents and generates an answer.

In [None]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

context = """You are an AI assistant that works for linkedin.
Your job is to match between a given position title to the best matching title existing in the vector store database.
If you can't find a good match, return null.
"""
prompt = ChatPromptTemplate.from_template(f"""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

document_chain = create_stuff_documents_chain(llm, prompt)

In [None]:
from langchain.chains import create_retrieval_chain

retriever = vector.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)
results = []
titles = ["assembly crew chief at H.M. Dunn Aerospace",
 "RMA | Customer Service Experience |",
 "EXPORT SPECIALIST at DHL Express Nederland",
 "Came out of retirement!",
 "Graduate Student at University Of Iowa",
 "Nurse Garreth",
 "Graduated from Remington College-Shreveport Campus",
 "Senior Program Manager at Credo",]
for i, title in enumerate(titles):
    response = retrieval_chain.invoke({"input": ""})
    print(response["answer"])