In [1]:
import os
import gc
import torch
import chromadb
from chromadb.utils import embedding_functions
from sentence_transformers import CrossEncoder

from utils.llmclass import RAGGenerator
from utils.search import Search
# # Cleanup
# gc.collect()
# torch.cuda.empty_cache()
# Configuration
DB_PATH = os.path.join(os.getcwd(), "seamanuals")  # Safe path joining
# MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

In [2]:
client = chromadb.PersistentClient(path=DB_PATH)

embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
collection = client.get_or_create_collection(name="Sea-Database",
                                             embedding_function=embedding_func)

In [3]:
search = Search.get(collection)

In [8]:
query = "What are the specific carpet area requirements for classrooms in a Maritime Training Institute based on student intake capacity?"
docs = search(query)

print("--- Retrieved Context ---")
for doc in docs:
    print(f"Content: \n{doc['content']}")

--- Retrieved Context ---
Content: 
2.9. Faculty room
2.9.1. A separate room not less than 8 m2 shall be provided for the Principal/head of
Institute. A carpet area of not less than 4 m2for each full-time faculty member shall be
provided. Modular separation between each faculty space is recommended.
2.9.2. The faculty shall also be provided with separate chair, table and cupboard.
Additional space, table and chairs shall be provided for visiting faculty. Ventilation and
lighting arrangement shall be same as for classrooms.
2.10. Classroom requirements
2.10.1. Class-room: The carpet area requirement of the class rooms and tutorial rooms
depends upon the number of students and type of seating arrangement. The size (carpet
area) of the classroom shall be 30 m2, 36 m2, and 50 m2 for intake capacity of 20, 24 and
40 candidates respectively. Institutes approved prior to 1st November, 2016 may continue
with the prevalent classroom size. However, if they apply for increase in capacity for
Cont

In [9]:
rag = RAGGenerator(model_name='gemini-2.5-flash')

In [12]:
# 1. SIMULATE RETRIEVAL
query = "What are the five criteria for standards of physical and medical fitness for seafarers?"
docs = search(query)
context = ["-> ".join([doc['source'], doc['content']]) for doc in docs]
# 2. GENERATE RESPONSE
print(f"Query: {query}\n")
answer = rag.generate_answer(query, context)
print(f"Response:\n{answer}")

Query: What are the five criteria for standards of physical and medical fitness for seafarers?

Response:
The five criteria for standards of physical and medical fitness for seafarers are:

1.  Have the physical capability to fulfil all the requirements of basic training.
2.  Demonstrate adequate hearing and speech to communicate effectively and detect any audible alarms.
3.  Have no medical condition, disorder or impairment that will prevent the effective and safe conduct of their routine and emergency duties onboard.
4.  Are not suffering from any medical condition likely to be aggravated by service at sea or likely to render the seafarer unfit for such service or to endanger the health and safety of other persons on-board.
5.  Are not taking any medication that has side effects that will impair judgement, balance or any other requirements for effective and safe performance of routine and emergency duties on-board.
