## Setup 

In [None]:
from vector_database.src.text_splitter import chunk_documents, save_chunks_to_disk
from vector_database.src.utils import load_config

from dotenv import load_dotenv
from pathlib import Path

# 1. Load environment
load_dotenv()

# 2. Load config
config_path = Path("config.yaml")
config = load_config(config_path)


## Download documents only if needed

In [None]:
from vector_database.src.documentation_loader import clone_repo, cleanup_old_outputs

# 3. If you want to download the docs from GitHub again, run this:
cleanup_old_outputs()
clone_repo(config)



## Load the Documents

In [None]:
from vector_database.src.documentation_loader import load_documents

# 4. Load documents
docs_path = config['data_source']['github']['target_path']
all_docs = load_documents(docs_path)

## Split documents

In [None]:

# 5. Chunk and save
chunks = chunk_documents(all_docs, config)
save_chunks_to_disk(chunks)

## Populate Qdrant Vector Database

In [None]:
from vector_database.src.vector_store import store_embeddings

# 6. Store embeddings to Qdrant
store_embeddings(chunks, config)

## LangChain vector database  to use in RAG

In [None]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

from vector_database.src.vector_store import COLLECTION_NAME,client,embeddings


# client.create_collection(
#     collection_name=COLLECTION_NAME,
#     vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
# )

vector_store = QdrantVectorStore(
    client=client,
    collection_name=COLLECTION_NAME,
    embedding=embeddings,
)