In [1]:
from git import Repo
import os
from dotenv import load_dotenv
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_mistralai import MistralAIEmbeddings
from langchain_chroma import Chroma
from uuid import uuid4
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_core.runnables import RunnableMap, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage


In [16]:
%pwd

'c:\\Users\\manav\\OneDrive\\Desktop\\Source Code Analysis\\research'

In [17]:
!mkdir test_repo

A subdirectory or file test_repo already exists.


In [2]:
repo_path = "test_repo/"

# repo = Repo.clone_from("https://github.com/entbappy/End-to-end-Medical-Chatbot-Generative-AI", to_path=repo_path)

In [3]:
loader = GenericLoader.from_filesystem(repo_path,
                                        glob = "**/*",
                                       suffixes=[".py"],
                                       parser = LanguageParser(language="python", parser_threshold=500)
)

In [4]:
documents = loader.load()

In [5]:
len(documents)

7

In [5]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(language = "python",
                                                             chunk_size = 500,
                                                             chunk_overlap = 20)

In [6]:
texts = documents_splitter.split_documents(documents)

In [19]:
load_dotenv()

True

In [8]:
embeddings = MistralAIEmbeddings(
    model="mistral-embed",
)

In [9]:
vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",
)

In [10]:
uuids = [str(uuid4()) for _ in range(len(texts))]

vector_store.add_documents(documents=texts, ids=uuids)

['ec53288e-9a39-4413-b566-ad77923a1565',
 'bc551c82-a6e2-4b9b-a950-5ec2e961957f',
 '901d663d-1ef1-42fd-bef2-a54890e4967e',
 'c9eb8e47-75b9-4172-9f07-d4238e330e9e',
 '4614aa8a-dc9b-444e-88ec-14801cb73871',
 'ebcd199d-e6fe-4948-9247-9e1b6c5f3f50',
 'b00c102f-4c1f-4063-a008-62a66cdffaea',
 '3ea990d8-1723-476d-8bd0-10ab41feab8f',
 'bc5155f7-42cd-45b3-81cd-a816608db4cc',
 '05c6d2d6-4828-43d4-b773-d4722ae4df8d',
 '19c9aeb7-27a1-4b55-bf54-c58faa3318f0',
 'cb440bb5-0905-4d5d-ae13-93e541e5458f',
 'd76ae44c-537c-4e12-8f30-a94f8892de96']

In [11]:
retriever = vector_store.as_retriever()

In [25]:
llm = ChatMistralAI(
    model="mistral-large-latest",
    temperature=0.7,
    max_retries=2,
    # other params...
)

In [26]:
memory_store = []  # single global chat history list

def load_history():
    return memory_store

def save_message(user_input, ai_output):
    global memory_store
    history = memory_store
    history.extend([
        HumanMessage(content=user_input),
        AIMessage(content=ai_output)
    ])
    
    # If too long, summarize older parts
    if len(history) > 8:
        summarized = summarize_history(history[:-4])
        history = [AIMessage(content=f"Summary of previous conversation: {summarized}")] + history[-4:]
    
    memory_store = history


def summarize_history(messages):
    """Summarize older parts of the chat to reduce token load."""
    summary_text = "\n".join(
        [f"{m.type.upper()}: {m.content}" for m in messages]
    )
    
    summary_prompt = f"""
    Summarize the following conversation in 3-4 concise bullet points
    that capture all key facts and context needed to continue it later.

    Conversation:
    {summary_text}
    """

    summary = llm.invoke(summary_prompt)
    return summary.content.strip()

# 4️⃣ Prompt with message placeholder
prompt = ChatPromptTemplate.from_messages([
        ("system", "You are a helpful assistant that answers using both chat history and provided documents."),
        MessagesPlaceholder("history"),
        ("human", "Context:\n{context}\n\nQuestion: {question}")
    ])

In [27]:
# 5️⃣ Runnable chain
chain = (
    RunnableMap({
        "history": lambda _: load_history(),
        "context": lambda x: retriever.invoke(x["question"]),
        "question": RunnablePassthrough(),
    })
    | prompt
    | llm
    | StrOutputParser()
)

In [28]:
def ask(question):
    response = chain.invoke({"question": question})
    save_message(question, response)
    return response

In [29]:
print(ask("what is download_hugging_face_embeddings funtion?"))

The `download_hugging_face_embeddings` function is defined in the `helper.py` file. Here's what it does:

- **Purpose**: It downloads and initializes a Hugging Face sentence transformer model for generating embeddings.
- **Implementation**:
  ```python
  def download_hugging_face_embeddings():
      embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
      return embeddings
  ```
- **Details**:
  - It uses the `HuggingFaceEmbeddings` class to load the `sentence-transformers/all-MiniLM-L6-v2` model.
  - This model generates embeddings with **384 dimensions**.
  - The function returns the initialized embeddings object, which can be used to convert text into vector embeddings.

This function is imported and used in the `app.py` file, indicating its role in the application's embedding generation process.


In [51]:
%cd ..

c:\Users\manav\OneDrive\Desktop\Source Code Analysis


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [52]:
%pwd

'c:\\Users\\manav\\OneDrive\\Desktop\\Source Code Analysis'

In [53]:
!chainlit init

2025-10-28 19:53:45 - Loaded .env file
2025-10-28 19:53:47 - Created default config file at c:\Users\manav\OneDrive\Desktop\Source Code Analysis\.chainlit\config.toml
2025-10-28 19:53:47 - Created default translation directory at c:\Users\manav\OneDrive\Desktop\Source Code Analysis\.chainlit\translations
2025-10-28 19:53:47 - Created default translation file at c:\Users\manav\OneDrive\Desktop\Source Code Analysis\.chainlit\translations\bn.json
2025-10-28 19:53:47 - Created default translation file at c:\Users\manav\OneDrive\Desktop\Source Code Analysis\.chainlit\translations\de-DE.json
2025-10-28 19:53:47 - Created default translation file at c:\Users\manav\OneDrive\Desktop\Source Code Analysis\.chainlit\translations\el-GR.json
2025-10-28 19:53:47 - Created default translation file at c:\Users\manav\OneDrive\Desktop\Source Code Analysis\.chainlit\translations\en-US.json
2025-10-28 19:53:47 - Created default translation file at c:\Users\manav\OneDrive\Desktop\Source Code Analysis\.chain