In [34]:
import os
from dotenv import load_dotenv
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load API key from .env file
load_dotenv()
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACEHUB_API_TOKEN")
# HUGGINGFACE_API_KEY = "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"
model_name = "mistralai/Mistral-7B-v0.3"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}


if not HUGGINGFACE_API_KEY:
    raise ValueError("API key not found! Set it in a .env file or environment variables.")

def load_and_prepare_data(file_path):
    """Loads and prepares text data for embedding."""
    loader = TextLoader(file_path)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)
    return texts

def create_vector_store(texts):
    """Creates a FAISS vector store from text data."""
    embeddings = HuggingFaceEmbeddings(model_name, model_kwargs, encode_kwargs)
        
    vector_store = FAISS.from_documents(texts, embeddings)
    return vector_store

def create_chatbot(file_path):
    """Initializes the chatbot using Hugging Face models."""
    texts = load_and_prepare_data(file_path)
    vector_store = create_vector_store(texts)
    retriever = vector_store.as_retriever()
    
    llm = HuggingFaceHub(
        repo_id="mistralai/Mistral-7B-v0.3",
        huggingfacehub_api_token=HUGGINGFACE_API_KEY
    )

    # Create the chatbot using RetrievalQA with proper arguments
    chatbot = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, chain_type="stuff")
    return chatbot


SyntaxError: invalid syntax (3203895160.py, line 32)

In [36]:
chatbot = create_chatbot("data/starknet_docs.txt")
response = chatbot.run("What is Starknet?")
print(response)
# Expected output: "Starknet is a permissionless, decentralized platform for building scalable, privacy-preserving applications on Ethereum."


TypeError: HuggingFaceEmbeddings.__init__() takes 1 positional argument but 4 were given