In [42]:
import os
import wikipedia
from decouple import config, AutoConfig
from langchain_mistralai.chat_models import ChatMistralAI
from langchain.schema import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.documents import Document
config = AutoConfig(search_path="/home/harry/Chatbot") 

In [43]:
from mistralai import Mistral

In [44]:
MISTRAL_API_KEY = config("MISTRAL_API_KEY")
HF_TOKEN = config("HF_TOKEN")
os.environ["HF_TOKEN"] = HF_TOKEN
UPSTASH_VECTOR_REST_URL = config("UPSTASH_VECTOR_REST_URL")
UPSTASH_VECTOR_REST_TOKEN = config("UPSTASH_VECTOR_REST_TOKEN")

In [45]:
from langchain_core.documents import Document

documents = []
cities = ["Tehran, Tehran", "shiraz, fars"]
for city in cities:
    wikipedia_page_result = wikipedia.page(title=city)
    doc = Document(
        page_content=wikipedia_page_result.content,
        metadata={
            "source": f"{wikipedia_page_result.url}",
            "title": city,
        }
    )
    documents.append(doc)

In [46]:
documents[1].metadata

{'source': 'https://en.wikipedia.org/wiki/Shiraz', 'title': 'shiraz, fars'}

In [47]:
len(documents)

2

In [48]:
embeddings = MistralAIEmbeddings(
    model="mistral-embed",
    api_key=config("HF_TOKEN")
)

In [49]:
from langchain_community.vectorstores import UpstashVectorStore

store = UpstashVectorStore(
    embedding=embeddings,
    index_url=UPSTASH_VECTOR_REST_URL,
    index_token=UPSTASH_VECTOR_REST_TOKEN
)

In [50]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter, TokenTextSplitter, CharacterTextSplitter

In [51]:
model = ChatMistralAI(api_key=MISTRAL_API_KEY)

In [52]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0)

In [53]:
docs = text_splitter.split_documents(documents)

In [54]:
len(docs)

935

In [None]:
inserted_vectors = store.add_documents(docs)

In [None]:
result = store.similarity_search_with_score("what is the the the tallest tower in Tehran?", k=2)
for doc, score in result:
    print(f"{doc.metadata} - {score}")