# import required library

In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_community.document_loaders import (PyPDFLoader,TextLoader,UnstructuredHTMLLoader,UnstructuredWordDocumentLoader,
                        UnstructuredPowerPointLoader,UnstructuredExcelLoader,CSVLoader,JSONLoader,PythonLoader,NotebookLoader,
                        YoutubeLoader,UnstructuredFileLoader)

In [2]:
import warnings
warnings.filterwarnings("ignore")

# Creating Embedding model

In [3]:
HF_TOKEN="##################################################"
embedding=HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

# Creating prompt template 

In [4]:
prompt=PromptTemplate(
    template="""
    You are a helpful assistant
    Answer only from the provided transcript context.
    If the context is insufficient, just say you don't know.
    {context}
    Question :{question}
    """,
    input_variables=["context","question"]
)

# Creating LLM using Hugingface API

In [5]:
llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it",
    huggingfacehub_api_token=HF_TOKEN,
    task="text-generation"
)
model=ChatHuggingFace(llm=llm,temperature=0.7)

# Creating String Parser

In [6]:
parser=StrOutputParser()

# Creating chain

In [7]:
chain=model|parser

#### PDF Loader

In [8]:
def pdf_loader(url,query):
    loader=PyPDFLoader(url)
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### Document Loader

In [9]:
def doc_loader(url,query):
    loader=UnstructuredWordDocumentLoader(url)
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### Text Loader

In [10]:
def text_loader(url,query):
    loader=TextLoader(url,autodetect_encoding=True)
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### HTML Loader

In [11]:
#UnstructuredHTMLLoader
def html_loader(url,query):
    loader=UnstructuredHTMLLoader(url,mode="elements", strategy="fast")
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### Powerpoint Loader

In [12]:
#UnstructuredPowerPointLoader
def powerpoint_loader(url,query):
    #!pip install langchain-community unstructured python-pptx
    loader=UnstructuredPowerPointLoader(url)
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### Excel Loader

In [13]:
#UnstructuredExcelLoader
def excel_loader(url,query):
    #!pip install "unstructured[xlsx]"
    loader=UnstructuredExcelLoader(url,mode="elements")
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### CSV Loader

In [14]:
#CSVLoader
def csv_loader(url,query):
    #!pip install "unstructured[xlsx]"
    loader=CSVLoader(url, csv_args={
            'delimiter': ',',
            'quotechar': '"'
        })
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### JSON Loader

In [15]:
#JSONLoader(*)
def json_loader(url,query):
    #pip install -U jq
    loader=JSONLoader(url,jq_schema=".",
        text_content=False
    )
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### Python Loader

In [16]:
#PythonLoader
def python_loader(url,query):
    loader=PythonLoader(url)
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### Notebook Loader

In [17]:
#NotebookLoader(*)
def notebook_loader(url,query):
    loader=NotebookLoader(url)
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

#### Unstructured Loader

In [31]:
#UnstructuredFileLoader
def UnstructuredLoader_loader(url,query):
    loader=UnstructuredFileLoader(url)
    data=loader.load()
    vector_store=FAISS.from_documents(data,embedding)
    retrieval=vector_store.as_retriever(search_type="similarity",search_kwargs={"k":3})
    final_prompt=prompt.invoke({"context":retrieval.invoke(query),"question":query})
    return final_prompt

###### Create a unified loader by combining all individual loaders

In [35]:
try:
    while True:
        path=r"{}".format(input("Enter your document path or exit: "))
        while True:
            if path[-6:]==".ipynb":
                query=input("Ask any think or exit: ")
                if query=="exit":
                    break
                a=notebook_loader(path,query)
                print(f"Your Query: {query}\nAI Message: {chain.invoke(a)}")
            elif path[-5:]==".json":
                query=input("Ask any think or exit: ")
                if query=="exit":
                    break
                a=json_loader(path,query)
                print(f"Your Query: {query}\nAI Message: {chain.invoke(a)}")
            elif path.lower()=="exit":
                break
            else:
                query=input("Ask any think or exit: ")
                if query=="exit":
                    break
                try:
                    a=UnstructuredLoader_loader(path,query)
                    print(f"Your Query: {query}\nAI Message: {chain.invoke(a)}")
                except:
                    print("I’m not able to read the uploaded document.I don’t have sufficient expertise..")
        if path.lower()=="exit":
            break
            
except:
    print("Please enter valid path")
finally:
    print("Thankyou!")
    

Enter your document path or exit:  C:\Users\sanjeev2.prajapati\Downloads\Untitled.ipynb
Ask any think or exit:  summarize in one line


Your Query: summarize in one line
AI Message: This code defines a program that uses a YouTube transcript API, vector stores, and a language model to answer questions about videos. 



Ask any think or exit:  exit
Enter your document path or exit:  exit


Thankyou!
