In [13]:
import numpy as np
import transformers
import getpass
import os
import langchain
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain_mistralai import MistralAIEmbeddings, ChatMistralAI
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import DocArrayInMemorySearch

In [14]:
# Loads text files as langchain documents
loader = TextLoader('./data/history_wiki_1.txt')
text_documents = loader.load()

In [15]:
# Need to split documents into smaller context chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 100) # Will have to play around with these
split_documents = text_splitter.split_documents(text_documents)

In [16]:
from langchain_pinecone import PineconeEmbeddings
os.environ["PINECONE_API_KEY"] = "d453f359-8849-4d17-b4bd-d5a2e398b8d8"
embeddings = PineconeEmbeddings(model="multilingual-e5-large")
vector_store = Chroma.from_documents(documents = split_documents, embedding = embeddings)


In [17]:
from langchain import PromptTemplate
prompt_template = """
Answer the question in one sentence based on the context below.

Context:
{context}

Question:
{question}
"""

prompt = PromptTemplate(
    input_variables = ["question", "context"],
    template = prompt_template
)

In [48]:
print(prompt.format(context = "Carnegie Mellon University is a Pittsburgh based school known for their CS research.", question = "Where is Carnegie Mellon University located?"))

Human: 
Answer the question in one sentence based on the context below.

Context:
Carnegie Mellon University is a Pittsburgh based school known for their CS research.

Question:
Where is Carnegie Mellon University located?



In [18]:
parser = StrOutputParser()

In [19]:
model = ChatMistralAI(model="mistral-large-latest")

In [23]:
chain = (
    {"context" : vector_store.as_retriever(), "question" : RunnablePassthrough()}
    | prompt
    | model
    | parser
)

In [24]:
chain.invoke({"text" : "Where is Carnegie Mellon University located?"})

'Carnegie Mellon University is located in the Oakland neighborhood of Pittsburgh.'

In [39]:
api_key = os.environ["MISTRAL_API_KEY"]

In [40]:
api_key

''