In [10]:
from langchain.chains import RetrievalQA
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
import torch

In [11]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"There are {torch.cuda.device_count()} GPU(s) available.")
    print(f"GPU device name: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("No GPU available, using CPU instead.")

print(f"Using device: {device}")

There are 1 GPU(s) available.
GPU device name: NVIDIA GeForce RTX 4070 Ti SUPER
Using device: cuda


In [12]:
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [13]:
vectorstore = FAISS.load_local("faiss_index/", embedding_model, allow_dangerous_deserialization=True)

In [None]:

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "mistral_model"  # Point to the local directory
tokenizer = AutoTokenizer.from_pretrained(model_name, token="ADD ID TOKEN HERE") #add your own token ID
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto", token="ADD ID TOKEN HERE")# add your own token ID here

Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.57s/it]
Some parameters are on the meta device because they were offloaded to the cpu.


In [None]:
llm_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=150,  # Adjust as needed have found this is best for current model
    temperature=0.7,
    top_p=0.9,
    device_map="auto"
)

Device set to use cuda:0


In [16]:
llm = HuggingFacePipeline(pipeline=llm_pipeline)

  llm = HuggingFacePipeline(pipeline=llm_pipeline)


In [17]:
prompt_template = """You are a retail assistant with knowlage of company process guides.  you are to follow the process guides and assist workers with any questions they may ask.   Only use informattion found in the provided documents.  You are to assume it is a instore return unless otherwise told.  Respond in 1-2 sentences.


Context:
{context}

Question: {question}
Answer:"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [18]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}),
    return_source_documents=False,
    chain_type_kwargs={"prompt": prompt}
)


In [None]:
query = "What is the return period on Major Appliances" #Change query to test model here 

In [None]:
def truncate_context(documents, max_tokens=300, tokenizer=tokenizer):
    truncated_docs = []
    for doc in documents:
        tokens = tokenizer.encode(doc.page_content, add_special_tokens=False)
        if len(tokens) > max_tokens // len(documents):  
            tokens = tokens[:max_tokens // len(documents)]
            doc.page_content = tokenizer.decode(tokens, skip_special_tokens=True)
        truncated_docs.append(doc)
    return truncated_docs

try:
    
    docs = vectorstore.as_retriever(search_kwargs={"k": 1}).invoke(query) #Have found that 1 - 2 is trhe best for K
    # Truncate context
    docs = truncate_context(docs, max_tokens=300) #reduced to prevent over filling 
    print("Retrieved Documents:")
    for i, doc in enumerate(docs):
        print(f"Doc {i+1}: {doc.page_content}")

    
    response = qa_chain.invoke({"query": query})
    final_answer = response.get('result')

    print(f"\nQuestion: {query}")
    print(f"Answer: {final_answer}")

except Exception as e:
    print(f"Error: {e}")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Retrieved Documents:
Doc 1: Return period of 2 days: Major Appliances* (e.g., Full Size Refrigerators, Freezers, Dishwashers, Oven Ranges, Cooktops, Furnaces, Trash Compactors, Wall Ovens, Washing Machine & Dryer Sets,

Question: What is the return period on Major Appliances
Answer: You are a retail assistant with knowlage of company process guides.  you are to follow the process guides and assist workers with any questions they may ask.   Only use informattion found in the provided documents.  You are to assume it is a instore return unless otherwise told.  Respond in 1-2 sentences.


Context:
Return period of 2 days: Major Appliances* (e.g., Full Size Refrigerators, Freezers, Dishwashers, Oven Ranges, Cooktops, Furnaces, Trash Compactors, Wall Ovens, Washing Machine & Dryer Sets,

Marketplace and Consumer Electronics items (most items are returnable within 30 days).

Question: What is the return period on Major Appliances
Answer: The return period for Major Appliances is 2 days accor