In [1]:
import os
import torch
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
import pymupdf

In [3]:
import fitz

def load_pdf(path):
    pages = []
    doc = fitz.open(path)
    for i in range(len(doc)):
        page = doc[i]
        text = page.get_text("text")  # "text" preserves natural reading order
        if text.strip():  # skip empty pages
            pages.append({"page": i + 1, "text": text})
    doc.close()
    return pages
    

In [5]:
# Define the path to your knowledge base file (relative to the notebook)
knowledge_base_path = '../data/path.pdf'

# Load the document content from PDF (returns list of dicts with page + text)
pages = load_pdf(knowledge_base_path)

# Initialize the text splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
)

# Split each page into chunks and keep page metadata
documents = []
for page in pages:
    chunks = text_splitter.create_documents([page["text"]])
    # add metadata (page number) to each chunk
    for chunk in chunks:
        chunk.metadata = {"page": page["page"]}
    documents.extend(chunks)

print(f"Split PDF into {len(documents)} chunks.")
# print(documents[0].page_content)  # Optional: inspect first chunk
# print(documents[0].metadata)      # Optional: check page number


Split PDF into 7 chunks.


In [6]:
# Initialize HuggingFace Embeddings model
# This model converts text into dense vector representations
embeddings_model_name = "sentence-transformers/all-MiniLM-L6-v2" # A good balance of size and performance
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)

# Create a FAISS vector store from the document chunks and embeddings
# This step creates an index that allows for efficient similarity search
vector_store = FAISS.from_documents(documents, embeddings)
print("FAISS vector store created.")

  embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)


FAISS vector store created.


In [7]:
# Define a sample query
query_to_test_vector_store = "What are the ways to fix the assistant?"

# Perform a similarity search
# This will return the most semantically similar document chunks to your query
retrieved_docs = vector_store.similarity_search(query_to_test_vector_store, k=3) # k=3 to get top 3 results

print("Query: {}\n".format({query_to_test_vector_store}))
print("--- Top 3 Retrieved Documents (Text Content) ---")
for i, doc in enumerate(retrieved_docs):
    print(f"Document {i+1}:")
    print(doc.page_content) # This shows the original text chunk
    print("-" * 50)

Query: {'What are the ways to fix the assistant?'}

--- Top 3 Retrieved Documents (Text Content) ---
Document 1:
SKILLS 
• Technical Skills: Python, TensorFlow, Keras, PyTorch, Hugging Face, API, React, Java, C/C++, C#, PostgreSQL, Model 
Evaluation, Enhancing Model Performance, Model Integration, Automatisation, Deep Learning Frameworks, LOps, Linux Systems, 
Version Control 
• AI/ML Skills: AI/machine Learning Concepts, AI Frameworks, LLM, NLP, Statistics, Linear Algebra, Multimodal Models, Rein- 
forcement Learning, Computer Vision, Research Publication 
• Soft Skills: Analytical Skills, Curiosity, Communication 
 
CERTIFICATIONS 
•Machine Learning Concept: Google 
•Generative AI Explained: Nvidia 
•Crash Course on Python: Google 
•C for EveryOne: University of California, Santa Cruz 
 
VOLUNTEER 
P1 Games 
Oct 2024 - Mar 2025 
AI Audio Systems Engineer (Video Game) 
• Developed AI-driven audio triggers that generated real-time contextual sound responses based on game states, player

In [8]:
from transformers import AutoModelForSeq2SeqLM
# Define the LLM model to use from Hugging Face
llm_model_name = "google/flan-t5-small" # A relatively small LLM for quick local setup

# Load the tokenizer for the LLM
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
# Load the LLM model itself
model = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name)

# Create a Hugging Face pipeline for text generation
# This simplifies using the LLM for generation tasks
llm_pipeline = pipeline(
    "text2text-generation", # Use 'text2text-generation' for Flan-T5
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200, # Max length of generated response
    temperature=0.7,    # Controls randomness (lower = more deterministic)
    do_sample=True,     # Enable sampling
    device=0 if torch.cuda.is_available() else -1 # Use GPU if available, else CPU
)
print(f"LLM pipeline loaded: {llm_model_name}")

Device set to use cpu


LLM pipeline loaded: google/flan-t5-small


In [9]:
# Prompt
prompt_template = """
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Helpful Answer:
"""
PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

from langchain.llms import HuggingFacePipeline

hf_llm = HuggingFacePipeline(pipeline=llm_pipeline)

qa_chain = ConversationalRetrievalChain.from_llm(
    llm=hf_llm,
    retriever=vector_store.as_retriever(),
    chain_type="stuff",
    return_source_documents=True,
    combine_docs_chain_kwargs={"prompt": PROMPT}
)
print("RAG QA Chain created.")


RAG QA Chain created.


  hf_llm = HuggingFacePipeline(pipeline=llm_pipeline)


In [10]:
# Chat history will be used for multi-turn conversations (though simple here)
chat_history = []

def chat_with_llm(query):
    # Pass the query and chat_history to the QA chain
    result = qa_chain({"question": query, "chat_history": chat_history})

    # Update chat history (simple memory for now)
    chat_history.append((query, result["answer"]))

    print(f"\nUser: {query}")
    print(f"Assistant: {result['answer']}")
    # Optionally, print source documents to see what the LLM used
    # print("\n--- Source Documents ---")
    # for doc in result["source_documents"]:
    #     print(doc.page_content)
    #     print("-" * 20)
    return result['answer']

# --- Test the Chatbot ---
print("Chatbot ready! Type 'exit' to quit.")

while True:
    user_input = input("You: ")
    if user_input.lower() == 'exit':
        break
    chat_with_llm(user_input)

Chatbot ready! Type 'exit' to quit.


You:  What is the professional summary of Mohamed Elkattoufi


  result = qa_chain({"question": query, "chat_history": chat_history})
Token indices sequence length is longer than the specified maximum sequence length for this model (795 > 512). Running this sequence through the model will result in indexing errors



User: What is the professional summary of Mohamed Elkattoufi
Assistant: EDUCATION Faculty of Sciences Monastir, Tunisie Sep 2024 - Present Master, Automated Reasoning Systems and Artificial Intelligence •GPA: 2.8 •Achievements: Specializing in developing scalable system architectures, applying machine learning to real-world problems, designing multi-agent models, and advancing expertise in computer vision, probabilistic modeling, and decision-making systems. WORK EXPERIENCE Mercor AI Aug 2025 - Present Multimodal Model Trainer San Francisco, California (Remote) Contributed to a multimodal AI research project by enriching and validating datasets with essential image-audio pairings to support robust model training. • Produced clear and detailed audio descriptions for extensive image sets, enhancing dataset quality for multimodal deep learning experiments. • Collaborated with research teams to assess and optimize data quality using statistical methods, which improved model training outco

You:  exit
