In [None]:
# pip install langchain sentence-transformers huggingface_hub transformers ctransformers llama-cpp-python

In [None]:
# from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer
import torch

In [None]:

model_name = "D:/mr_document/all_models/gemma-keras-gemma_1.1_instruct_2b_en-v4/" #choose a model that does not require lots of ram.

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) #device_map="auto" will use the GPU.

In [None]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=256)
llm = HuggingFacePipeline(pipeline=pipe)

print(llm("What are the benefits of using a GPU for deep learning?"))

In [None]:
import os
from langchain.llms import CTransformers
from langchain.embeddings import LlamaCppEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

def load_local_model_and_qa(model_path, tokenizer_path, vectorstore_path, checklist_path, params_path):
    """
    Loads a local model, embeddings, and vector store, and sets up a question-answering system.

    Args:
        model_path (str): Path to the model file (e.g., .bin, .gguf).
        tokenizer_path (str): Path to the tokenizer file (e.g., tokenizer.model).
        vectorstore_path (str): Path to the vectorstore.
        checklist_path (str): path to checklist file.
        params_path (str): path to params file.

    Returns:
        RetrievalQA: A LangChain RetrievalQA chain.
    """

    try:
        # Load the local language model using CTransformers or llama-cpp-python, depending on your model.
        # Check the model file extension to determine which library to use.
        if model_path.endswith(('.bin', '.gguf')):
            try:
                llm = CTransformers(model=model_path, model_type="llama")  # or other model_type
            except Exception:
                from langchain.llms import LlamaCpp
                llm = LlamaCpp(model_path=model_path)
        else:
            raise ValueError(f"Unsupported model file type: {model_path}")

        # Load local embeddings.
        embeddings = LlamaCppEmbeddings(model_path=model_path)

        # Load the vector store.
        vectorstore = FAISS.load_local(vectorstore_path, embeddings)

        # Create the RetrievalQA chain.
        qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever())

        return qa

    except Exception as e:
        print(f"Error loading local model and QA system: {e}")
        return None

def main():
    """
    Main function to execute the local question-answering system.
    """

    model_path = "consolidated.00.pth"  # Replace with the actual path to your model file.
    tokenizer_path = "tokenizer.model" # Replace with actual path.
    vectorstore_path = "vectorstore_faiss" #Replace with actual path.
    checklist_path = "checklist.chk" #replace with actual path.
    params_path = "params" # replace with actual path.

    # Check if files exist
    if not all(os.path.exists(path) for path in [model_path, tokenizer_path, vectorstore_path, checklist_path, params_path]):
        print("One or more files not found.")
        return

    qa = load_local_model_and_qa(model_path, tokenizer_path, vectorstore_path, checklist_path, params_path)

    if qa:
        query = "What is the purpose of this model?" #Example query.
        result = qa.run(query)
        print(result)



In [None]:
if __name__ == "__main__":
    main()