In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

from nlp_chat_bot.rag.classic_rag import ClassicRAG
from nlp_chat_bot.model.embedding.late_chunking_embedding import LateChunkingEmbedding
from langchain_google_genai import ChatGoogleGenerativeAI

from nlp_chat_bot.vector_store.late_chunking_chroma_vector_store_builder import LateChunkingChromaVectorStoreBuilder

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
dataset_path = "../data"
model_download_path = "../models"
vector_store_path = "../chromadb"
embedding_function = LateChunkingEmbedding(model_download_path)

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # chunk size (characters)
    chunk_overlap=50,  # chunk overlap (characters)
    add_start_index=True,  # track index in original document
)

llm_gemini = ChatGoogleGenerativeAI(model="gemini-1.5-flash")
vector_store = LateChunkingChromaVectorStoreBuilder(dataset_path,
                                        embedding_function,
                                        vector_store_path,
                                        splitter=splitter).build()

rag = ClassicRAG(vector_store, llm_gemini)
print("LENGTH", rag.get_num_docs())
docs_retrieved = rag.retrieve(state = {"question": "What is my conclusion in my project report on image inpainting?", "context": []})

print("Num docs:", len(docs_retrieved["context"]))

for i in range(len(docs_retrieved["context"])):
    doc = docs_retrieved["context"][i]
    print("\n\n", "#"*30,"\n")
    print(f"doc {i}: (score: {doc.metadata['score']})")
    print(doc.page_content)

100%|██████████| 1/1 [00:00<00:00,  5.68it/s]
0it [00:00, ?it/s]
100%|██████████| 1/1 [00:01<00:00,  1.99s/it]
0it [00:00, ?it/s]
Filtering existing documents: 100%|██████████| 6/6 [00:00<?, ?it/s]
Storing 6 documents embeddings (batch size is 10): 10it [00:12,  1.23s/it]              

Storing 35 total chunks len, 35 chunks with 35 embeddings
Documents are now loaded
LENGTH 35
Num docs: 3


 ############################## 

doc 0: (score: 35.03460781059225)
Image Inpainting with Basic Convolutional Networks
Robin Meneust, Ethan Pinto
December 2024
1 Introduction
In the context of our ”AI-Based Image Processing”
course, we worked on this project, in which we repro-
duced and tested a specific image inpainting approach,
defined by the paper ”Context Encoders: Feature Learn-
ing by Inpainting”(Pathak et al., 2016)[1].
Image inpainting consists of filling hole(s) in an im-
age. There exist different methods to do so (e.g. they
compared their results with Photoshop). In this paper,
they used a context encoder trained in an adversarial
way. Basically there is a generator, this is our context
encoder (here an encoder and a decoder) that given an
image of size 128x128 with a dropout region (a ”hole”,
with values set to 0) tries to predict what should be inside
the hole. We f




In [4]:
rag.invoke(query={"question":"What is my conclusion in my project report on image inpainting?"})["answer"]

"The project's results were not as good as the original paper's, possibly due to dataset differences.  Improvements could involve using simpler datasets, adjusting parameters like the learning rate, adding noise, and using pre-trained models.  A PyTorch Lightning implementation of a context encoder was created to facilitate understanding and experimentation.\n"