In [1]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv(), override=True)

True

In [2]:
# loading PDF, DOCX and TXT files as LangChain Documents
def load_document(file):
    import os
    name, extension = os.path.splitext(file)

    if extension == '.pdf':
        from langchain.document_loaders import PyPDFLoader
        print(f'Loading {file}')
        loader = PyPDFLoader(file)
    elif extension == '.docx':
        from langchain.document_loaders import Docx2txtLoader
        print(f'Loading {file}')
        loader = Docx2txtLoader(file)
    elif extension == '.txt':
        from langchain.document_loaders import TextLoader
        loader = TextLoader(file)
    else:
        print('Document format is not supported!')
        return None

    data = loader.load()
    return data


# wikipedia
def load_from_wikipedia(query, lang='en', load_max_docs=2):
    from langchain.document_loaders import WikipediaLoader
    loader = WikipediaLoader(query=query, lang=lang, load_max_docs=load_max_docs)
    data = loader.load()
    return data
  

In [10]:
def chunk_data(data, chunk_size=256):
    from langchain.text_splitter import RecursiveCharacterTextSplitter
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0)
    chunks = text_splitter.split_documents(data)
    return chunks
    

In [14]:
def insert_or_fetch_embeddings(chunks):
   
    from langchain_community.vectorstores import FAISS
    from langchain_huggingface import HuggingFaceEmbeddings
    
    
    
        
    embeddings = HuggingFaceEmbeddings()  # 512 works as well

  
    vector_store = FAISS.from_documents(chunks, embeddings)
    return vector_store
    
        
    

In [8]:
def ask_and_get_answer(vector_store, q, k=3):
    from langchain.chains import RetrievalQA
    from langchain_groq import ChatGroq
    groq_api_key = os.getenv('GROQ_API_KEY')
    llm = ChatGroq( groq_api_key= groq_api_key, model='llama-3.1-70b-versatile', temperature=1)

    retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': k})

    chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
    
    answer = chain.invoke(q)
    return answer
    

In [11]:
x = load_document("Applied DAX with Power BI From zero to hero with 15-minute lessons (Lachev, Teo) (Z-Library).pdf")

Loading Applied DAX with Power BI From zero to hero with 15-minute lessons (Lachev, Teo) (Z-Library).pdf


  from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4


In [12]:
y = chunk_data(x)

In [15]:
z = insert_or_fetch_embeddings(y)

  from tqdm.autonotebook import tqdm, trange


In [18]:
q = 'What is ALLSELECTED() ?'
answer = ask_and_get_answer(z, q)
print(answer)

{'query': 'What is ALLSELECTED() ?', 'result': 'ALLSELECTED() is a function that removes context filters from columns and rows in the current query, while retaining all other context filters or explicit filters.'}


In [19]:
from langchain_groq import ChatGroq
from langchain.chains import ConversationalRetrievalChain  # Import class for building conversational AI chains 
from langchain.memory import ConversationBufferMemory  # Import memory for storing conversation history

# Instantiate a ChatGPT LLM (temperature controls randomness)
#llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)  

# Configure vector store to act as a retriever (finding similar items, returning top 5)
retriever = z.as_retriever(search_type='similarity', search_kwargs={'k': 5})  

groq_api_key = os.getenv('GROQ_API_KEY')
lm = ChatGroq( groq_api_key= groq_api_key, model='llama-3.1-70b-versatile', temperature=1)
# Create a memory buffer to track the conversation
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)

crc = ConversationalRetrievalChain.from_llm(
    llm=lm,  # Link the ChatGPT LLM
    retriever=retriever,  # Link the vector store based retriever
    memory=memory,  # Link the conversation memory
    chain_type='stuff',  # Specify the chain type
    verbose=False  # Set to True to enable verbose logging for debugging
)


In [20]:
# create a function to ask questions
def ask_question(q, chain):
    result = chain.invoke({'question': q})
    return result

In [21]:
q = 'What is ALL() in DAX ?'
result = ask_question(q, crc)
print(result)

{'question': 'What is ALL() in DAX ?', 'chat_history': [HumanMessage(content='What is ALL() in DAX ?'), AIMessage(content="According to the provided context, the ALL() function in DAX is used to remove existing filters. It transitions the filter context, and you don't have to ignore it with the ALL function. It can be used to remove filters from one or more columns of a table.")], 'answer': "According to the provided context, the ALL() function in DAX is used to remove existing filters. It transitions the filter context, and you don't have to ignore it with the ALL function. It can be used to remove filters from one or more columns of a table."}
