In [None]:
pip install pdfplumber

In [None]:
pip install faiss-gpu

In [None]:
pip install gradio

In [None]:
pip install langchain-community

In [None]:
# pip install pypdf

In [None]:
import gradio as gr
import os
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
import pdfplumber

#load documents
def load_documents(data_dir):
    """Load text data from PDFs and TXT files."""
    documents = []
    for file_name in os.listdir(data_dir):
        file_path = os.path.join(data_dir, file_name)
        if file_name.endswith(".pdf"):
            with pdfplumber.open(file_path) as pdf:
                for page in pdf.pages:
                    documents.append(page.extract_text())
        elif file_name.endswith(".txt"):
            loader = TextLoader(file_path)
            docs = loader.load()
            documents.extend([doc.page_content for doc in docs])
    return documents


#process docs with hugging face
def process_documents(docs):
    """Split documents into chunks and generate embeddings."""
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
    chunks = text_splitter.split_documents(docs)
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return chunks, embeddings

#store the vector
def build_vector_store(chunks, embeddings):
    """Create and save a FAISS vector store."""
    vector_store = FAISS.from_documents(chunks, embeddings)
    vector_store.save_local("retriever_index")
    return vector_store

#load model
def load_llm():
    """Load the LLM. text generation"""
    pipe = pipeline("text-generation", model="gpt2", max_new_tokens=50)
    return HuggingFacePipeline(pipeline=pipe)

# Building the QA Chain
def build_chain(llm, vector_store):
    # Convert the FAISS vector store into a retriever
    retriever = vector_store.as_retriever()

    qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
    return qa_chain

# Load pre-built index or create a new one
def get_chain():
    if not os.path.exists("retriever_index"):
        print("Building the index from documents...")
        docs = load_documents(data_dir="path_to_dataset")  # Change path to your dataset directory
        chunks, embeddings = process_documents(docs)
        vector_store = build_vector_store(chunks, embeddings)
        llm = load_llm()
        chain = build_chain(llm, vector_store)
    else:
        print("Loading the pre-built index...")
        embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
        vector_store = FAISS.load_local("retriever_index", embeddings, allow_dangerous_deserialization=True)
        llm = load_llm()
        chain = build_chain(llm, vector_store)

    return chain

#response to query
def respond_to_query(user_input):
    chain = get_chain()
    response = chain.invoke(user_input)
    raw_response = response.get('result', '')
    helpful_answer_start = raw_response.find("Helpful Answer:")
    if helpful_answer_start != -1:
        helpful_answer = raw_response[helpful_answer_start + len("Helpful Answer:"):].strip()
    else:
        helpful_answer = raw_response.strip()
    cleaned_response = helpful_answer.replace('\n', ' ').strip()

    return f"{cleaned_response}"


def main():
    """Launch the Gradio interface."""
    iface = gr.Interface(
        fn=respond_to_query,
        inputs=gr.Textbox(label="Ask me anything:", placeholder="Type your question here..."),
        outputs=gr.Textbox(label="Bot's Response"),
        title="Mental Wellness AI Chatbot",
    )

    iface.launch(debug=True)

if __name__ == "__main__":
    main()