In [1]:
import os
import json
import logging
import time
import re
import tempfile
import pandas as pd
from typing import List
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
#from langchain_community.vectorstores import Chroma
from langchain_community import embeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
import os
from typing import List, Tuple
from langchain.document_loaders import TextLoader, PDFMinerLoader
from langchain.docstore.document import Document

In [2]:
import logging
import os
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed

import click
import torch
from langchain.docstore.document import Document
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceInstructEmbeddings  

In [3]:
def load_single_document(file_path: str) -> Tuple[Document, str]:
    # Loads a single document from a file path and returns the Document and extracted text
    if file_path.endswith(".txt"):
        loader = TextLoader(file_path, encoding="utf8")
        doc = loader.load()[0]
        extracted_text = doc.page_content
    elif file_path.endswith(".pdf"):
        loader = PDFMinerLoader(file_path)
        doc = loader.load()[0]
        extracted_text = doc.page_content
    elif file_path.endswith(".csv"):
        loader = ICSVLoader(file_path)
        doc = loader.load()[0]
        extracted_text = doc.page_content
    else:
        doc = None
        extracted_text = ""
    return doc, extracted_text

In [4]:
def load_documents(source_dir: str) -> Tuple[List[Document], List[str], List[str]]:
    # Loads all documents from source documents directory
    all_files = os.listdir(source_dir)
    documents = []
    file_names = []
    extracted_texts = []
    for file_path in all_files:
        doc, extracted_text = load_single_document(f"{source_dir}/{file_path}")
        if doc is not None:
            documents.append(doc)
            file_names.append(file_path)
            extracted_texts.append(extracted_text)
    return documents, file_names, extracted_texts

In [5]:
def clean_document(extracted_texts):
    # Clean each extracted text
    cleaned_texts = []
    for text in extracted_texts:
        # Remove any unnecessary escape sequences
        cleaned_text = re.sub(r'\\n|\\[|\\]', '', text)
        # Remove any extra whitespace
        cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
        # Remove any remaining unnecessary characters or formatting
        cleaned_text = re.sub(r'[^a-zA-Z0-9\s.,?!]', '', cleaned_text)
        cleaned_texts.append(cleaned_text)
    return cleaned_texts

In [7]:
print(f'Loading documents from {"/Users/ar-kushal.bs/Downloads/dataset"}')
documents, file_names, extracted_texts = load_documents("/Users/ar-kushal.bs/Downloads/dataset")
print("List of loaded files:")
for file_name in file_names:
    print(file_name)

Loading documents from /Users/ar-kushal.bs/Downloads/dataset
List of loaded files:
ICG-CCS-001-Rakuten.pdf
RakutenPresentation.pdf


In [8]:
cleaned_texts = clean_document(extracted_texts)
cleaned_texts

['Rakuten CCS 001  Certified Case Study May 2014  Internal Consulting Group 2015 CCS 001  Certified Case Study  Rakuten COMMERCIAL IN CONFIDENCE Confidentiality Our clients industries are extremely competitive. The confidentiality of companies plans and data is obviously critical. ICG will protect the confidentiality of all such client information. Similarly, management consulting is a competitive business. We view our approaches and insights as proprietary and therefore look to our clients to protect ICGs interests in our proposals, presentations, methodologies and analytical techniques. Under no circumstances should this material be shared with any third party without the explicit written permission of ICG. Disclaimer ICG has made good faith efforts to ensure that this material is a highquality publication. However, ICG does not warrant completeness or accuracy, and does not warrant that use of the material ICGs provisioning service will be uninterrupted or errorfree, or that the res

In [9]:
for i, extracted_text in enumerate(extracted_texts):
    print(f"Extracted text from {file_names[i]}:\n{extracted_text}\n")

Extracted text from ICG-CCS-001-Rakuten.pdf:
Rakuten 
CCS 001 – Certified Case Study 

May 2014 

© Internal Consulting Group 2015 

CCS 001 - Certified Case Study - Rakuten 

COMMERCIAL IN CONFIDENCE 

Confidentiality 
Our clients’ industries are extremely competitive. The confidentiality of companies’ plans and data is obviously critical. ICG will protect the confidentiality of all 
such client information. Similarly, management consulting is a competitive business. We view our approaches and insights as proprietary and therefore look to 
our clients to protect ICG’s interests in our proposals, presentations, methodologies and analytical techniques. Under no circumstances should this material be 
shared with any third party without the explicit written permission of ICG. 

Disclaimer 
ICG has made good faith efforts to ensure that this material is a high-quality publication. However, ICG does not warrant completeness or accuracy, and does 
not warrant that use of the material ICG’s 

In [10]:
extracted_texts

["Rakuten \nCCS 001 – Certified Case Study \n\nMay 2014 \n\n© Internal Consulting Group 2015 \n\nCCS 001 - Certified Case Study - Rakuten \n\nCOMMERCIAL IN CONFIDENCE \n\n\x0cConfidentiality \nOur clients’ industries are extremely competitive. The confidentiality of companies’ plans and data is obviously critical. ICG will protect the confidentiality of all \nsuch client information. Similarly, management consulting is a competitive business. We view our approaches and insights as proprietary and therefore look to \nour clients to protect ICG’s interests in our proposals, presentations, methodologies and analytical techniques. Under no circumstances should this material be \nshared with any third party without the explicit written permission of ICG. \n\nDisclaimer \nICG has made good faith efforts to ensure that this material is a high-quality publication. However, ICG does not warrant completeness or accuracy, and does \nnot warrant that use of the material ICG’s provisioning service 

In [11]:
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 200
SEPARATOR = '\n'
def create_chunked_documents(cleaned_texts: List[str]) -> List[Document]:
  chunked_documents = []
  for text in cleaned_texts:
    # Create a splitter with desired chunk size and overlap (remove separator argument)
    splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
    chunks = splitter.split_text(text)

    # Create a Document object for each chunk
    for chunk in chunks:
      chunked_document = Document(page_content=chunk)
      chunked_documents.append(chunked_document)
  return chunked_documents


In [12]:
# Create chunked documents from extracted text (correct call)
chunks = create_chunked_documents(cleaned_texts) 
print(chunks)
# You can choose how to print the chunks here (e.g., information about each or specific content)

[Document(page_content='Rakuten CCS 001  Certified Case Study May 2014  Internal Consulting Group 2015 CCS 001  Certified Case Study  Rakuten COMMERCIAL IN CONFIDENCE Confidentiality Our clients industries are extremely competitive. The confidentiality of companies plans and data is obviously critical. ICG will protect the confidentiality of all such client information. Similarly, management consulting is a competitive business. We view our approaches and insights as proprietary and therefore look to our clients to protect ICGs interests in our proposals, presentations, methodologies and analytical techniques. Under no circumstances should this material be shared with any third party without the explicit written permission of ICG. Disclaimer ICG has made good faith efforts to ensure that this material is a highquality publication. However, ICG does not warrant completeness or accuracy, and does not warrant that use of the material ICGs provisioning service will be uninterrupted or erro

In [13]:
print(type(chunks))

<class 'list'>


In [14]:
len(documents)

2

In [15]:
len(chunks)

45

In [16]:
chunks[:1]

[Document(page_content='Rakuten CCS 001  Certified Case Study May 2014  Internal Consulting Group 2015 CCS 001  Certified Case Study  Rakuten COMMERCIAL IN CONFIDENCE Confidentiality Our clients industries are extremely competitive. The confidentiality of companies plans and data is obviously critical. ICG will protect the confidentiality of all such client information. Similarly, management consulting is a competitive business. We view our approaches and insights as proprietary and therefore look to our clients to protect ICGs interests in our proposals, presentations, methodologies and analytical techniques. Under no circumstances should this material be shared with any third party without the explicit written permission of ICG. Disclaimer ICG has made good faith efforts to ensure that this material is a highquality publication. However, ICG does not warrant completeness or accuracy, and does not warrant that use of the material ICGs provisioning service will be uninterrupted or erro

In [17]:
cleaned_texts = clean_document(extracted_texts)
chunks = create_chunked_documents(cleaned_texts)

# Extract the text content from each Document object
texts = [doc.page_content for doc in chunks]


In [18]:
texts

['Rakuten CCS 001  Certified Case Study May 2014  Internal Consulting Group 2015 CCS 001  Certified Case Study  Rakuten COMMERCIAL IN CONFIDENCE Confidentiality Our clients industries are extremely competitive. The confidentiality of companies plans and data is obviously critical. ICG will protect the confidentiality of all such client information. Similarly, management consulting is a competitive business. We view our approaches and insights as proprietary and therefore look to our clients to protect ICGs interests in our proposals, presentations, methodologies and analytical techniques. Under no circumstances should this material be shared with any third party without the explicit written permission of ICG. Disclaimer ICG has made good faith efforts to ensure that this material is a highquality publication. However, ICG does not warrant completeness or accuracy, and does not warrant that use of the material ICGs provisioning service will be uninterrupted or errorfree, or that the',
 

In [19]:
import warnings
warnings.filterwarnings("ignore", message=".*`resume_download` is deprecated.*")

In [20]:
EMBEDDING_MODEL_NAME = "hkunlp/instructor-xl"
EMBEDDING_MODEL = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

embeddings = EMBEDDING_MODEL.embed_documents(texts)

  from .autonotebook import tqdm as notebook_tqdm


In [21]:
embeddings

[[0.053644489496946335,
  0.025741826742887497,
  0.05278472602367401,
  -0.08012218773365021,
  -0.022235145792365074,
  -0.08002122491598129,
  -0.03257262706756592,
  0.013822251930832863,
  -0.028918053954839706,
  -0.023147469386458397,
  0.04796256870031357,
  0.01564909890294075,
  -0.02304920181632042,
  -0.14663787186145782,
  -0.061040788888931274,
  0.02333192341029644,
  -0.005925419274717569,
  -0.029670320451259613,
  -0.015170636586844921,
  0.0033962957095354795,
  -0.01805134490132332,
  -0.015271499752998352,
  -0.0009137341403402388,
  0.037174101918935776,
  -0.03654230758547783,
  -0.027438923716545105,
  -0.012164749205112457,
  0.02633656933903694,
  -0.01629648730158806,
  0.012232023291289806,
  0.038798488676548004,
  -0.01712741144001484,
  0.02211720682680607,
  -0.015321051701903343,
  0.013348630629479885,
  0.015349614433944225,
  -0.07313469797372818,
  -0.020620951429009438,
  0.0263949166983366,
  0.016948513686656952,
  -0.03305668383836746,
  0.07002

In [22]:
len(embeddings)

45

In [23]:
import json

def save_chunks_and_embeddings_to_json(chunks, embeddings, output_file):
    """
    Save the document chunks and their embeddings to a JSON file.

    Args:
        chunks (List[Document]): A list of Document objects containing the text chunks.
        embeddings (List[List[float]]): A list of embeddings corresponding to each text chunk.
        output_file (str): The path to the output JSON file.
    """
    data = []
    for i, doc in enumerate(chunks):
        data.append({
            'chunk': doc.page_content,
            'embedding': embeddings[i]
        })

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

In [24]:
import json

def save_chunks_and_embeddings_to_json(chunks, embeddings, output_file):
    """
    Save the document chunks and their embeddings to a JSON file.

    Args:
        chunks (List[Document]): A list of Document objects containing the text chunks.
        embeddings (List[List[float]]): A list of embeddings corresponding to each text chunk.
        output_file (str): The path to the output JSON file.
    """
    data = []
    for i, doc in enumerate(chunks):
        data.append({
            'chunk': doc.page_content,
            'embedding': embeddings[i]
        })

    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

In [25]:
save_chunks_and_embeddings_to_json(chunks, embeddings, 'chunks_and_embeddings.json')

In [26]:
import json
from typing import List
import faiss
import numpy as np

def load_chunks_and_embeddings_from_json(json_file):
    """
    Load document chunks and embeddings from a JSON file.

    Args:
        json_file (str): The path to the JSON file containing chunks and embeddings.

    Returns:
        Tuple[List[str], List[List[float]]]: A tuple containing lists of chunks and embeddings.
    """
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)

    chunks = [item['chunk'] for item in data]
    embeddings = [item['embedding'] for item in data]

    return chunks, embeddings

In [27]:
def save_to_faiss_index(chunks, embeddings, index_file):
    """
    Save document chunks and embeddings to a FAISS index.

    Args:
        chunks (List[str]): A list of document chunks.
        embeddings (List[List[float]]): A list of embeddings corresponding to each chunk.
        index_file (str): The path to the FAISS index file.
    """
    # Convert embeddings to numpy array
    embeddings_np = np.array(embeddings, dtype=np.float32)

    # Create FAISS index
    index = faiss.IndexFlatL2(embeddings_np.shape[1])

    # Add embeddings to the index
    index.add(embeddings_np)

    # Save the index to disk
    faiss.write_index(index, index_file)

    print(f"FAISS index saved to: {index_file}")

In [29]:
json_file = "chunks_and_embeddings.json"
index_file = "/Users/ar-kushal.bs/Downloads/untitled folder/Vector_store/Faiss_store3"

chunks, embeddings = load_chunks_and_embeddings_from_json(json_file)
save_to_faiss_index(chunks, embeddings, index_file)

FAISS index saved to: /Users/ar-kushal.bs/Downloads/untitled folder/Vector_store/Faiss_store3


In [30]:
db=save_to_faiss_index(chunks, embeddings, index_file)

FAISS index saved to: /Users/ar-kushal.bs/Downloads/untitled folder/Vector_store/Faiss_store3


In [31]:
from huggingface_hub import hf_hub_download
from transformers import (
    GenerationConfig,
    pipeline,
)
from langchain.llms import HuggingFacePipeline
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate


In [32]:
EMBEDDING_MODEL_NAME = "hkunlp/instructor-xl"

In [33]:
# Context Window and Max New Tokens
CONTEXT_WINDOW_SIZE = 8096
MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE 

In [35]:
# Define the folder for storing database
SOURCE_DIRECTORY = "/Users/ar-kushal.bs/Downloads/untitled folder/"
json_file = "chunks_and_embeddings.json"
index_file = "//Users/ar-kushal.bs/Downloads/untitled folder/Vector_store/Faiss_store3"  ## PERSIST_DIRECTORY



In [36]:
# !pip install --upgrade huggingface_hub

In [37]:
# from huggingface_hub import login
# login(token ="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")

In [39]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

# Set the desired directory path to store the model
model_path = "/Users/ar-kushal.bs/Downloads/untitled folder/ms"

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", cache_dir=model_path)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", cache_dir=model_path)

# Use a pipeline as a high-level helper
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

Downloading shards: 100%|██████████| 3/3 [16:24<00:00, 328.00s/it]
Loading checkpoint shards: 100%|██████████| 3/3 [01:05<00:00, 21.68s/it]


In [40]:
generation_config = GenerationConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")


In [41]:
pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_length=MAX_NEW_TOKENS,
        temperature=0.1,
        repetition_penalty=1.15,
        generation_config=generation_config,
    )

In [42]:
prompt_template = """
### [INST] 
Instruction: You are a helpful assistant, you will use the provided context to answer user questions.
Read the given context before answering questions and think step by step. If you can not answer a user question based on 
the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.

{context}

### QUESTION:
{question} 

[/INST]
 """

In [43]:
local_llm = HuggingFacePipeline(pipeline=pipe)

In [44]:
local_llm

HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x1354a18d0>)

In [49]:
prompt=PromptTemplate(input_variables=["context","question"], template=prompt_template)
prompt

PromptTemplate(input_variables=['context', 'question'], template='\n### [INST] \nInstruction: You are a helpful assistant, you will use the provided context to answer user questions.\nRead the given context before answering questions and think step by step. If you can not answer a user question based on \nthe provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.\n\n{context}\n\n### QUESTION:\n{question} \n\n[/INST]\n ')

In [50]:
# Create llm chain 
llm_chain = LLMChain(llm=local_llm, prompt=prompt)

  warn_deprecated(


In [51]:
llm_chain

LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template='\n### [INST] \nInstruction: You are a helpful assistant, you will use the provided context to answer user questions.\nRead the given context before answering questions and think step by step. If you can not answer a user question based on \nthe provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.\n\n{context}\n\n### QUESTION:\n{question} \n\n[/INST]\n '), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x1354a18d0>))

In [53]:
llm_chain.invoke({"context":"", 
                  "question": "What is Rakuten?"})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


KeyboardInterrupt: 

In [72]:
# def load_model( model_id, model_basename=None, LOGGING=logging):

#     logging.info(f"Loading Model: {model}")
#     logging.info("This action can take a few minutes!")
    
#     # Load configuration from the model to avoid warnings
#     generation_config = GenerationConfig.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

    
#     # Create a pipeline for text generation
#     pipe = pipeline(
#         "text-generation",
#         model=model,
#         tokenizer=tokenizer,
#         max_length=MAX_NEW_TOKENS,
#         temperature=0.1,
#         repetition_penalty=1.15,
#         generation_config=generation_config,
#     )

#     local_llm = HuggingFacePipeline(pipeline=pipe)
#     logging.info("Local LLM Loaded")

#     return local_llm

In [73]:
# local_llm

HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7fe110411070>)

In [74]:
# model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )
    )
    (norm): MistralRMSNorm(

In [75]:
# tokenizer

LlamaTokenizerFast(name_or_path='mistralai/Mistral-7B-Instruct-v0.2', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [76]:
# def retrieval_qa_pipline(device_type, use_history, promptTemplate_type="mistral"):

#     embeddings = get_embeddings(device_type)

#     logging.info(f"Loaded embeddings from {EMBEDDING_MODEL_NAME}")

#     # load the vectorstore
#     db = Chroma(persist_directory=PERSIST_DIRECTORY, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
#     retriever = db.as_retriever()

#     # get the prompt template and memory if set by the user.
#     prompt, memory = get_prompt_template(promptTemplate_type=promptTemplate_type, history=use_history)

#     # load the llm pipeline
#     llm = load_model(device_type, model_id=MODEL_ID, model_basename=MODEL_BASENAME, LOGGING=logging)

#     if use_history:
#         qa = RetrievalQA.from_chain_type(
#             llm=llm,
#             chain_type="stuff",  # try other chains types as well. refine, map_reduce, map_rerank
#             retriever=retriever,
#             return_source_documents=True,  # verbose=True,
#             callbacks=callback_manager,
#             chain_type_kwargs={"prompt": prompt, "memory": memory},
#         )
#     else:
#         qa = RetrievalQA.from_chain_type(
#             llm=llm,
#             chain_type="stuff",  # try other chains types as well. refine, map_reduce, map_rerank
#             retriever=retriever,
#             return_source_documents=True,  # verbose=True,
#             callbacks=callback_manager,
#             chain_type_kwargs={
#                 "prompt": prompt,
#             },
#         )

#     return qa



In [77]:
import warnings
warnings.filterwarnings("ignore", message=".*`resume_download` is deprecated.*")

In [78]:
from langchain.embeddings import HuggingFaceEmbeddings

EMBEDDING_MODEL_NAME = "hkunlp/instructor-xl"
EMBEDDING_MODEL = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

def create_question_embedding(question):
    """
    Creates an embedding for the given question using the specified HuggingFaceEmbeddings model.

    Args:
        question (str): The question for which to create the embedding.

    Returns:
        numpy.ndarray: The embedding vector for the given question.
    """
    question_embedding = EMBEDDING_MODEL.embed_query(question)
    return question_embedding


In [79]:
question = "WHO IS THE AUTHORS OF ATTENTION ALL YOU NEED"
question_embedding = create_question_embedding(question)
print(question_embedding)

[-0.028944356366991997, 0.030917001888155937, -0.005974740721285343, -0.040754806250333786, -0.009460925124585629, -0.03773093968629837, -0.10731622576713562, 0.02450764924287796, -0.07763005793094635, -0.002296104794368148, 0.005397134460508823, 0.02367265522480011, -0.059531133621931076, -0.08024653792381287, 0.008288819342851639, 0.0023345390800386667, 0.05294385552406311, -0.08859746158123016, 0.0030103137250989676, -0.023452406749129295, -0.017133334651589394, 0.0030524071771651506, -0.026054328307509422, 0.051366034895181656, -0.011609370820224285, 0.025328725576400757, 0.010272295214235783, 0.0033462403807789087, -0.024008115753531456, 0.02117733657360077, 0.08993073552846909, -0.007965140044689178, 0.05748137831687927, -0.07106544077396393, 0.059642717242240906, -0.006837508641183376, 0.01754496991634369, -0.021443719044327736, 0.026316527277231216, -0.029968708753585815, -0.017777955159544945, 0.039643868803977966, 0.016424911096692085, -0.027522176504135132, 0.066334143280982

In [80]:
len(question_embedding)

768

In [81]:
import faiss

def search_faiss_index(question, index_file, k=5):
    """
    Search the FAISS index for the most relevant document chunks based on the given question.

    Args:
        question (str): The question to search for.
        index_file (str): The path to the FAISS index file.
        k (int): The number of nearest neighbors to retrieve (default: 5).

    Returns:
        List[Tuple[str, float]]: A list of tuples containing the document chunks and their scores.
    """
    # Load the FAISS index
    index = faiss.read_index(index_file)

    # Create the question embedding
    question_embedding = create_question_embedding(question)

    # Search for the nearest neighbors
    distances, indices = index.search(np.array([question_embedding]), k)

    # Load the document chunks and embeddings from the JSON file
    chunks, _ = load_chunks_and_embeddings_from_json("chunks_and_embeddings.json")

    # Retrieve the relevant document chunks and their scores
    results = [(chunks[idx], 1 - dist) for idx, dist in zip(indices[0], distances[0])]

    return results

In [82]:
question = "WHO IS THE AUTHORS OF ATTENTION ALL YOU NEED"
results = search_faiss_index(question, "/opt/llm/Vector_Store/Faiss_store")
for chunk, score in results:
    print(f"Score: {score:.2f}, Chunk: {chunk}")

Score: 0.44, Chunk: 3 2 0 2 g u A 2  L C . s c  7 v 2 6 7 3 0 . 6 0 7 1  v i X r a Provided proper attribution is provided, Google hereby grants permission to reproduce the tables and figures in this paper solely for use in journalistic or scholarly works. Attention Is All You Need Ashish Vaswani Google Brain avaswanigoogle.com Noam Shazeer Google Brain noamgoogle.com Niki Parmar Google Research nikipgoogle.com Jakob Uszkoreit Google Research uszgoogle.com Llion Jones Google Research lliongoogle.com Aidan N. Gomez  University of Toronto aidancs.toronto.edu ukasz Kaiser Google Brain lukaszkaisergoogle.com Illia Polosukhin  illia.polosukhingmail.com Abstract The dominant sequence transduction models are based on complex recurrent or convolutional neural networks that include an encoder and a decoder. The best performing models also connect the encoder and decoder through an attention mechanism. We propose a new simple network architecture, the Transformer, based solely on attention mecha

In [83]:
# # Greeting LLM chain
# greetings_llm = LLMChain(llm=llm, prompt=PromptTemplate.from_template(GREETINGS_PROMPT))


In [84]:
def get_similar_question(question):
  question_embedding = create_question_embedding(question)
  max_similarity = -float('inf')
  most_similar_question = None
  for stored_question, stored_embedding in question_embeddings.items():
    similarity = np.dot(question_embedding, stored_embedding)  # Dot product for similarity
    if similarity > max_similarity:
      max_similarity = similarity
      most_similar_question = stored_question
  return most_similar_question

In [85]:
def llm_query(context, question):
  similar_question = get_similar_question(question)
  prompt = prompt_template.format(context=context, question=question, similar_question=similar_question)
  return llm_chain.invoke({"context": context, "question": prompt})

In [86]:
llm_chain

LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template='<s>[INST] You are a helpful assistant, you will use the provided context to answer user questions.\nRead the given context before answering questions and think step by step. If you can not answer a user question based on \nthe provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question.\n            \n            Context: {context}\n            User: {question} [/INST]'), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x7fe110411070>))

In [87]:
context:" "
question = "What are the advantages of using LSTMs?"

response = llm_query(context, question)
print(response)

SyntaxError: invalid syntax (1471955554.py, line 1)

In [60]:
llm_chain.invoke({"context":"", 
                  "question": "What is LSTM?"})

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


KeyboardInterrupt: 

In [None]:
def generate_response(question, index_file, k=5):
    """
    Generate a response to the given question using the FAISS index and LLM chain.

    Args:
        question (str): The question to answer.
        index_file (str): The path to the FAISS index file.
        k (int): The number of nearest neighbors to retrieve (default: 5).

    Returns:
        str: The generated response.
    """
    # Search for relevant document chunks
    results = search_faiss_index(question, index_file, k=k)

    # Combine the relevant document chunks into a context string
    context = ' '.join([chunk for chunk, _ in results])

    # Generate the response using the LLMChain
    #response = chain.run(question=question, context=context)
    llm_chain = LLMChain(llm=local_llm, prompt=prompt)
    llm_chain.invoke({"context":"", 
                  "question": "What is LSTM?"})

    return llm_chain

In [None]:
question = "hi"
response = generate_response(question, "/opt/llm/Vector_Store/Faiss_store",k=5)
print(response)

In [None]:
# Make sure to provide the correct file path for the FAISS index
index_file_path = "/opt/llm/Vector_Store/Faiss_store"


In [None]:
def generate_response(question, index_file, k=5):
    """
    Generate a response to the given question using the FAISS index and LLM chain.

    Args:
        question (str): The question to answer.
        index_file (str): The path to the FAISS index file.
        k (int): The number of nearest neighbors to retrieve (default: 5).

    Returns:
        str: The generated response.
    """
    # Search for relevant document chunks
    results = search_faiss_index(question, index_file, k=k)

    # Combine the relevant document chunks into a context string
    context = ' '.join([chunk for chunk, _ in results])

    # Generate the response using the LLMChain
    #response = chain.run(question=question, context=context)
    llm_chain = LLMChain(llm=local_llm, prompt=prompt)
    
    return llm_chain

In [None]:
llm_chain.invoke({"context":"", 
                  "question": "What is LSTM?"})

In [None]:
question = "What is LSTM"

# Generate the response
answer = generate_response(question)
print(answer)

In [None]:
rag_chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain
)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
import faiss

In [None]:
# Set up embeddings
EMBEDDING_MODEL_NAME = "hkunlp/instructor-xl"
EMBEDDING_MODEL = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)

def create_question_embedding(question):
    question_embedding = EMBEDDING_MODEL.embed_query(question)
    return question_embedding

# Set up FAISS index
index = faiss.IndexFlatIP(EMBEDDING_MODEL.embedding_dim)
faiss_index = index

In [None]:
question = "What is LSTM?"
question_embedding = create_question_embedding(question)
faiss_index.add(question_embedding.reshape(1, -1))
result = llm_chain.invoke({"context": "", "question": question})
print(result)