In [2]:
import os
from git import Repo
from langchain.text_splitter import Language
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_groq import ChatGroq
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.chains import conversational_retrieval

In [3]:
!mkdir test_repo

In [4]:
repo_path = "test_repo/"
repo = Repo.clone_from("https://github.com/LokeshDangare/Signature-Recognition-System", to_path=repo_path)

In [5]:
# Load all the python files from that repo

loader = GenericLoader.from_filesystem(
    repo_path,
    glob= "**/*",
    suffixes=[".py"],
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500)
)

In [6]:
documents = loader.load()

In [7]:
documents

[Document(metadata={'source': 'test_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content='from fastapi import FastAPI, File\nfrom uvicorn import run as app_run\nfrom fastapi.middleware.cors import CORSMiddleware\nfrom fastapi.responses import Response, JSONResponse\nfrom src.constants import APP_HOST, APP_PORT\nfrom src.pipeline.training import TrainingPipeline\nfrom src.pipeline.prediction import PredictionPipeline\n\napp = FastAPI()\n\norigins = [\'#\']\napp.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=[\'#\'],\n                   allow_headers=[\'#\'])\n\n\n@app.get("/train")\nasync def training():\n    try:\n        train_pipeline = TrainingPipeline()\n        train_pipeline.run_pipeline()\n        return Response("Training Successful !!!")\n    except Exception as e:\n        return Response(f"Error Occurred!!! {e}")\n\n\n@app.post("/predict")\nasync def prediction(image_file: bytes = File(description="A file read as 

In [8]:
len(documents)

23

In [9]:
documents[0]

Document(metadata={'source': 'test_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content='from fastapi import FastAPI, File\nfrom uvicorn import run as app_run\nfrom fastapi.middleware.cors import CORSMiddleware\nfrom fastapi.responses import Response, JSONResponse\nfrom src.constants import APP_HOST, APP_PORT\nfrom src.pipeline.training import TrainingPipeline\nfrom src.pipeline.prediction import PredictionPipeline\n\napp = FastAPI()\n\norigins = [\'#\']\napp.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=[\'#\'],\n                   allow_headers=[\'#\'])\n\n\n@app.get("/train")\nasync def training():\n    try:\n        train_pipeline = TrainingPipeline()\n        train_pipeline.run_pipeline()\n        return Response("Training Successful !!!")\n    except Exception as e:\n        return Response(f"Error Occurred!!! {e}")\n\n\n@app.post("/predict")\nasync def prediction(image_file: bytes = File(description="A file read as b

In [10]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON,
    chunk_size=500,
    chunk_overlap=20
)

In [11]:
texts = documents_splitter.split_documents(documents)

In [12]:
texts

[Document(metadata={'source': 'test_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content="from fastapi import FastAPI, File\nfrom uvicorn import run as app_run\nfrom fastapi.middleware.cors import CORSMiddleware\nfrom fastapi.responses import Response, JSONResponse\nfrom src.constants import APP_HOST, APP_PORT\nfrom src.pipeline.training import TrainingPipeline\nfrom src.pipeline.prediction import PredictionPipeline\n\napp = FastAPI()\n\norigins = ['#']\napp.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=['#'],\n                   allow_headers=['#'])"),
 Document(metadata={'source': 'test_repo\\app.py', 'language': <Language.PYTHON: 'python'>}, page_content='@app.get("/train")\nasync def training():\n    try:\n        train_pipeline = TrainingPipeline()\n        train_pipeline.run_pipeline()\n        return Response("Training Successful !!!")\n    except Exception as e:\n        return Response(f"Error Occurred!!! {e}")'),


In [14]:
len(texts)

132

In [15]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
GROQ_API_KEY = os.environ["GROQ_API_KEY"]

In [16]:
embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})

  embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
  from tqdm.autonotebook import tqdm, trange


In [17]:
vectordb = Chroma.from_documents(texts, embedding=embeddings_model, persist_directory='.db')

In [18]:
vectordb.persist()

  vectordb.persist()


In [19]:
llm = ChatGroq(model="llama3-70b-8192")

In [20]:
memory = ConversationSummaryMemory(llm=llm, memory_key= "chat_history", return_messages=True)

In [33]:
qa = ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type="mmr", search_kwargs={"k":8}), memory=memory)

In [39]:
question = "what is evaluate function?"

In [40]:
result = qa(question)
print(result['answer'])

The purpose of the `evaluate` function is to evaluate a model using a given loss function and data loader. It takes a model, a criterion (loss function), and a test data loader as input, and likely calculates the loss of the model on the test data.


In [41]:
question = "what is get_data_from_gcloud function?"

In [43]:
result = qa(question)
print(result['answer'])

The purpose of the `get_data_from_gcloud` function is to fetch data from a GCloud Storage Bucket. It returns data ingestion artifacts.
