In [None]:
print("ok")

In [None]:
import PyPDF2
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA
import os
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from transformers import pipeline

In [None]:
# Step 1: Extract text from multiple PDFs
def extract_text_from_pdfs(pdf_folder):
    text = ""
    for file_name in os.listdir(pdf_folder):
        if file_name.endswith(".pdf"):
            pdf_path = os.path.join(pdf_folder, file_name)
            with open(pdf_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                for page in reader.pages:
                    text += page.extract_text() + "\n"
    return text

In [None]:
def split_text_into_chunks(text, chunk_size=500, chunk_overlap=50):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return text_splitter.split_text(text)

In [None]:
def create_vector_database(chunks, embeddings_model):
    save_path = r"D:/AI-Projects/hydrogen-chatbot/vectore_db/vector_db.faiss"
    
    if os.path.exists(save_path):
        print(f"Loading existing vector database from {save_path}...")
        vector_db = FAISS.load_local(save_path, HuggingFaceEmbeddings(model_name=embeddings_model),allow_dangerous_deserialization=True)
    else:
        if not chunks:
            raise ValueError("Chunks are required to create a new vector database.")
        print("Creating a new vector database...")
        embeddings = HuggingFaceEmbeddings(model_name=embeddings_model)
        vector_db = FAISS.from_texts(chunks, embeddings)
        vector_db.save_local(save_path)
        print(f"Vector database created and saved to {save_path}")
    
    return vector_db

In [None]:
def load_pretrained_model(model_path):
    llm = pipeline("question-answering", "timpal0l/mdeberta-v3-base-squad2")
    return llm

In [None]:
def build_chatbot(vector_db, llm):
    retriever = vector_db.as_retriever()
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        return_source_documents=True
    )
    return qa_chain

In [None]:
pdf_folder = "D:\AI-Projects\hydrogen-chatbot\data"  # Folder containing multiple PDF files
embeddings_model = "sentence-transformers/all-MiniLM-L6-v2"  # Open-source embeddings
#pretrained_model_path = "D:\AI-Projects\hydrogen-market-chatbot\model\pytorch_model.bin"  # Path to the pre-trained Llama model

In [None]:
data = extract_text_from_pdfs(pdf_folder)

In [None]:
# Split text into chunks
print("Splitting text into chunks...")
chunks = split_text_into_chunks(data)

In [None]:
vector_db = create_vector_database(chunks,embeddings_model)

In [None]:
retriever = vector_db.as_retriever(search_type="similarity_score_threshold",search_kwargs={'score_threshold': 0.4})

In [None]:
retriver_docs = retriever.invoke("The easiest and most mature way to store hydrogen gas is") 

In [None]:
retriver_docs

In [None]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, clearly state that you don't know, and avoid making up an answer.

Context: {context}
Question: {question}

Please provide a detailed and informative response based on the context given. If relevant, elaborate on key points to enhance understanding.

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [None]:
PROMPT = PromptTemplate(template=prompt_template, input_veriables=['context','question'])
chain_type_kwargs={"prompt":PROMPT}

In [None]:
repo_id = "mistralai/Mistral-7B-Instruct-v0.3"

# Define model_kwargs
model_kwargs = {
    "max_length": 128
}

# Initialize the model with the updated parameters
llm = HuggingFaceEndpoint(
    repo_id=repo_id,
    token="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",
    temperature=0.7,  # Pass temperature explicitly
    model_kwargs=model_kwargs
)

In [None]:
llm.invoke("The easiest and most mature way to store hydrogen gas is")

In [29]:
qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=vector_db.as_retriever(search_kwargs={'k': 2,'score_threshold': 0.7},search_type="similarity_score_threshold"),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [None]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa.invoke({"query": user_input})
    print("Response : ", result["result"])


In [30]:
print("Chatbot is ready! Type 'exit' to quit.")
while True:
    user_input=input(f"Input Prompt:")
    if user_input.lower() == 'exit':
        break
    result=qa.invoke({"query": user_input})
    print("Response : ", result["result"])

Chatbot is ready! Type 'exit' to quit.


No relevant docs were retrieved using the relevance score threshold 0.7


Response :  
The most mature and safest method for storing hydrogen gas is in a high-pressure cylinder made of a strong material like steel or aluminum alloy. The cylinders are designed to withstand the pressure and are equipped with safety features such as pressure relief valves and overfill protection devices to prevent over-pressurization. It's important to store hydrogen in a cool, dry, and well-ventilated area to minimize the risk of a fire or explosion. Additionally, hydrogen should be stored away from incompatible materials and sources of ignition.


No relevant docs were retrieved using the relevance score threshold 0.7


Response :  Amal Chuhan is a fictional character from the popular Netflix series "Mindhunter." She is portrayed by the actress Holt McCallany. Amal Chuhan is an FBI agent who works closely with the protagonists Holden Ford and Bill Tench, helping them in their efforts to understand the psychology of serial killers. She is known for her sharp intellect and strong determination in her work.


No relevant docs were retrieved using the relevance score threshold 0.7


Response :  
The most mature and safest method for storing hydrogen gas is in a high-pressure cylinder made of a strong material like steel or aluminum alloy. The cylinders are designed to withstand the pressure and are equipped with safety features such as pressure relief valves and overfill protection devices to prevent over-pressurization. It's important to store hydrogen in a cool, dry, and well-ventilated area to minimize the risk of a fire or explosion. Additionally, hydrogen should be stored away from incompatible materials and sources of ignition.
