In [28]:
import os
from git import Repo
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import Language
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

In [3]:
%pwd

'd:\\GenAI\\Bappy\\LiveProject\\GenAI-SourceCodeAnalysis\\research'

In [4]:
!mkdir rest_repo

In [5]:
repo_path = "rest_repo/"
repo = Repo.clone_from("https://github.com/Aminsharif/GenAI-MedicalChatbot.git", to_path=repo_path)

In [6]:
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=['.py'],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500)
)

In [13]:
data = loader.load()

In [14]:
doc_spliter = RecursiveCharacterTextSplitter.from_language(language=Language.PYTHON,
                                                           chunk_size = 500,
                                                           chunk_overlap = 20)

In [15]:
documents = doc_spliter.split_documents(documents=data)

In [16]:
documents

[Document(metadata={'source': 'rest_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content='import os\nfrom langchain_groq import ChatGroq\nfrom langchain.chains.combine_documents import create_stuff_documents_chain\nfrom langchain.chains import create_retrieval_chain\nfrom flask import Flask, render_template, jsonify, request\nfrom src.helper import load_huggingface_embeddings\nfrom langchain_pinecone import PineconeVectorStore\nfrom src.prompt import *\nfrom dotenv import load_dotenv\n\napp = Flask(__name__)\n\nload_dotenv()'),
 Document(metadata={'source': 'rest_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content='load_dotenv()\n\nPINECONE_API_KEY=os.environ.get(\'PINECONE_API_KEY\')\ngroq_api_key = os.getenv(\'GROQ_API_KEY\')\n\n\nembeddings = load_huggingface_embeddings()\nllm = ChatGroq(model_name = "llama3-8b-8192",temperature=0.5,max_tokens=500, groq_api_key = os.getenv(\'GROQ_API_KEY\'))\n\nindex_name = \'medicalbot\'\n\ndocsearch = PineconeVec

In [29]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [30]:
vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory="./db")

In [31]:
vectordb.persist()

In [32]:
groq_api_key = os.getenv('GROQ_API_KEY')

In [33]:
llm = ChatGroq(model_name='llama3-8b-8192', temperature=0.5, groq_api_key= groq_api_key, max_tokens=500)

In [34]:
memory = ConversationSummaryMemory(llm=llm, memory_key="chat_history", return_messages=True)

In [35]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type = "mmr", search_kwargs={"k":8}), memory=memory)

In [36]:
question = "What is load_hugging_face_embeddings function"

In [37]:
result = qa(question)

In [38]:
print(result['answer'])


The `load_huggingface_embeddings` function is used to load pre-trained language model embeddings from the Hugging Face Transformers library. The function returns an instance of the `HuggingFaceEmbeddings` class, which is a wrapper around the pre-trained model.

The function takes the model name as an argument, which specifies the pre-trained model to load. In this case, the model name is `"sentence-transformers/all-MiniLM-L6-v2"`, which is a pre-trained sentence transformer model.

The `load_huggingface_embeddings` function is used to load the pre-trained embeddings for the purpose of generating vector representations of text documents. These embeddings are then used to index and search the documents in the Pinecone vector store.

Here is the code snippet for the `load_huggingface_embeddings` function:
```
def load_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings
```
This function is used to load