In [72]:
import os
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader

# -------------- EMBEDDING MODEL ----------------
modelPath = "sentence-transformers/all-MiniLM-l6-v2"
model_kwargs = {'device':'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,
    model_kwargs=model_kwargs, 
    encode_kwargs=encode_kwargs 
)
# -----------------------------------------------

# -------------------- PATHS --------------------
current_dir = os.getcwd()
db_dir = os.path.join(current_dir, "db")
idx_name = "FAISS_metadata"
# -----------------------------------------------

def ensure_faiss_db_exists():

    index_path = os.path.join(db_dir, f"{idx_name}.faiss")
    if not os.path.exists(index_path):

        dummy_doc = Document(page_content="", metadata={"source": "init"})
        faissdb = FAISS.from_documents([dummy_doc], embeddings)

        faissdb.save_local(folder_path=db_dir, index_name=idx_name)
        
def text_file_extract(file_name):
    
    ensure_faiss_db_exists()

    file_path = os.path.join(current_dir, "uploads", file_name)

    loader = TextLoader(file_path, encoding="utf-8")
    data = loader.load()

    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist. Please check the path.")
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100,
        length_function=len,
    )

    chunks = text_splitter.split_documents(data)

    for chunk in chunks:
        chunk.metadata = {'source': file_name}

    faissdb = FAISS.load_local(
        folder_path=db_dir,
        embeddings=embeddings,
        index_name=idx_name,
        allow_dangerous_deserialization=True,
    )

    faissdb.add_documents(chunks)

    faissdb.save_local(folder_path=db_dir, index_name=idx_name)

def pdf_file_extract(file_name):

    ensure_faiss_db_exists()

    file_path = os.path.join(current_dir, "uploads", file_name)

    loader = PyMuPDFLoader(file_path)
    data = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=10,
        length_function=len,
    )

    chunks = text_splitter.split_documents(data)

    for chunk in chunks:
        chunk.metadata = {'source': file_name}

    faissdb = FAISS.load_local(
        folder_path=db_dir,
        embeddings=embeddings,
        index_name=idx_name,
        allow_dangerous_deserialization=True,
    )

    faissdb.add_documents(chunks)

    faissdb.save_local(folder_path=db_dir, index_name=idx_name)

def retrieve_relevant_data(query):

    if not os.path.exists(os.path.join(db_dir, f"{idx_name}.faiss")):
        return """No Documents Are Added."""

    faissdb = FAISS.load_local(
        folder_path=db_dir, 
        embeddings=embeddings, 
        index_name=idx_name, 
        allow_dangerous_deserialization=True
    )

    sim_retriever = faissdb.as_retriever(
        search_type='similarity',
        search_kwargs={'k':5},
    )

    relevant_doc1 = sim_retriever.invoke(query)

    result = ""

    for doc in relevant_doc1:
        result += f'\nRelevant Doc: \n {doc.page_content}'
        result += f'\nSource: {doc.metadata}'

    return result

In [73]:
file_name = "romeo_and_juliet.txt"

In [75]:
file_name = "NVResume.pdf"

In [76]:
if file_name.endswith('.txt'):
    text_file_extract(file_name)
elif file_name.endswith('pdf'):
    pdf_file_extract(file_name)

In [78]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")

model = ChatGoogleGenerativeAI(model='gemini-2.5-flash', google_api_key=api_key)

query = "Who is Nithin Vikas and which role he suits for in job?"

prompt = f"""
        ** Use the Given Context to answer the question. Also provide the Source name you picked from **
        If the Answer is not in the context then notify the user about no information regarding the question in the context.
        If the Provided Question is not relevant to the context then answer through the data which you trained on.
        
        {retrieve_relevant_data(query)}
        
        You are a Helpful AI Assitant who patiently answers every user's query.

        User Query: {query}

        """

response = model.invoke(prompt)

print(response.content)

Nithin Vikas AV is an individual whose resume details are provided in the context. He is currently pursuing an M.Sc. in Cyber Security (Integrated) at PSG College of Technology and is also in Grade 12 at Nava Bharath National School. He possesses technical skills in languages like C/C++, Java, Python, and SQL, along with knowledge in Data Structures and Algorithms, Operating Systems, Computer Networks, DBMS, and OOPs. He has also worked on projects such as "HoneyPot-Lite" (cybersecurity-related) and a "2D Adventure Game" (software development).

Based on his M.Sc. in Cyber Security, his "HoneyPot-Lite" project, and his technical skills in Python, Computer Networks, and Operating Systems, Nithin Vikas AV appears to be well-suited for roles in **cybersecurity**, **network security**, or **junior software development** positions, especially those involving Python or Java.

Source: NVResume.pdf
