In [1]:
from git import Repo
from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain

In [2]:
!mkdir test_repo

In [3]:
repo_path = "test_repo/"

repo = Repo.clone_from("https://github.com/SayanDas07/edunet-foundation", to_path=repo_path)

In [39]:
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".py"],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500)
)

In [40]:
documents = loader.load()

In [41]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(language = Language.PYTHON,
                                                             chunk_size = 300,
                                                             chunk_overlap = 20)

In [42]:
texts = documents_splitter.split_documents(documents)

In [43]:
len(texts)

44

In [8]:
from dotenv import load_dotenv
load_dotenv()


True

In [9]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
vectordb = Chroma.from_documents(texts, embedding=embeddings, persist_directory='./db')

In [46]:
llm = ChatGoogleGenerativeAI(
                model="gemini-1.5-flash", 
                temperature=0.4, 
                max_tokens=700
            )

In [47]:
memory = ConversationSummaryMemory(llm=llm, memory_key = "chat_history", return_messages=True)

In [48]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k":8}),memory=memory)

In [49]:
result = qa.invoke({
    "question": "whhat is def load_model()?"
})


In [45]:
print(result['answer'])

The definition of `load_model()` is:

```python
def load_model():
    model = joblib.load("diabetes_trained_model.pkl")  
    scaler = joblib.load("scaler.pkl")        
    return model, scaler
```

It loads a pre-trained diabetes prediction model and a scaler from files named "diabetes_trained_model.pkl" and "scaler.pkl" respectively, using the `joblib` library.  It then returns both the model and the scaler.

