<a href="https://colab.research.google.com/github/McInotsh-On-Mac/JaneaAI/blob/main/JaneaAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain_groq



In [None]:
!pip install pypdf



In [None]:
!pip install chromadb



In [None]:
!pip install -U langchain-community



In [None]:
!pip install unstructured[local-inference]



In [None]:
!pip install gradio



In [122]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain_groq import ChatGroq
import gradio as gr  # Import Gradio


def initialize_llm():
    """Initializes the language model (ChatGroq).
    Returns:
        ChatGroq: The initialized ChatGroq language model.
    """
    try:
        llm = ChatGroq(
            temperature=0,
            groq_api_key="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",  # Use the provided API key
            model_name="llama-3.3-70b-versatile"
        )
        return llm
    except Exception as e:
        print(f"Error initializing ChatGroq: {e}")
        return None


def create_vector_db(persist_directory="./chroma_db", pdf_folder_path="/content/data/"):
    """
    Creates a Chroma vector database from PDF files in a directory.

    Args:
        persist_directory (str, optional): The directory to persist the Chroma database.
            Defaults to "./chroma_db".
        pdf_folder_path (str, optional): The path to the directory containing the PDF files.
            Defaults to "/content/data/".

    Returns:
        Chroma: The Chroma vector database, or None on error.
    """
    try:
        loader = DirectoryLoader(pdf_folder_path, glob='*.pdf', loader_cls=PyPDFLoader)
        documents = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        texts = text_splitter.split_documents(documents)
        embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
        vector_db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
        vector_db.persist()
        print("ChromaDB created and data saved")
        return vector_db
    except Exception as e:
        print(f"Error creating vector database: {e}")
        return None


def setup_qa_chain(vector_db, llm):
    """
    Sets up the question-answering chain.

    Args:
        vector_db (Chroma): The Chroma vector database.
        llm: The language model.

    Returns:
        RetrievalQA: The question-answering chain, or None on error.
    """
    try:
        retriever = vector_db.as_retriever()
        prompt_templates = """You are a compassionate mental health chatbot. Respond thoughtfully to the following question:
        {context}
        User: {question}
        Chatbot: """
        PROMPT = PromptTemplate(template=prompt_templates, input_variables=['context', 'question'])
        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=retriever,
            chain_type_kwargs={"prompt": PROMPT}
        )
        return qa_chain
    except Exception as e:
        print(f"Error setting up QA chain: {e}")
        return None


def predict(message, history):
    """Function to handle user input and generate chatbot response with history."""
    global qa_chain
    if qa_chain is None:
        return "Chatbot is initializing. Please wait..."
    try:
        response = qa_chain.run(message)
        return response
    except Exception as e:
        return f"Error during query: {e}"


def main():
    """Main function to initialize the chatbot and launch the Gradio ChatInterface."""
    global qa_chain
    print("Initializing Chatbot.........")
    llm = initialize_llm()
    if llm is None:
        print("Failed to initialize language model. Exiting.")
        return

    db_path = "./chroma_db"

    if not os.path.exists(db_path):
        vector_db = create_vector_db()
        if vector_db is None:
            print("Failed to create vector database. Exiting.")
            return
    else:
        try:
            embeddings = HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
            vector_db = Chroma(persist_directory=db_path, embedding_function=embeddings)
        except Exception as e:
            print(f"Error loading existing database: {e}")
            vector_db = create_vector_db()
            if vector_db is None:
                print("Failed to create vector database. Exiting.")
                return

    qa_chain = setup_qa_chain(vector_db, llm)
    if qa_chain is None:
        print("Failed to set up QA chain. Exiting.")
        return

    # Create the Gradio ChatInterface
    iface = gr.ChatInterface(
        fn=predict,
        title="JaneaAI",
        description="Ask me anything about mental health!",
    )
    iface.launch()


if __name__ == "__main__":
    qa_chain = None  # Initialize qa_chain globally
    main()

Initializing Chatbot.........


  self.chatbot = Chatbot(


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://87229c85e7b980995c.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
