In [45]:
import os
import sys
from dotenv import load_dotenv,find_dotenv
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader,WebBaseLoader,UnstructuredURLLoader
from langchain_core.runnables import RunnablePassthrough

In [46]:
_ = load_dotenv(find_dotenv())  # read local .env file
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [47]:
llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)

In [48]:
# loading data 
loader = UnstructuredURLLoader(urls=["https://aziz-ashfak.github.io/profile/"])
data = loader.load()

In [49]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(data)


In [50]:
texts = [doc.page_content for doc in docs]

In [51]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [52]:
vectorstore = FAISS.from_texts(texts, embeddings)

In [53]:
retrive = vectorstore.as_retriever()

In [54]:
retrive.invoke("Who is Aziz Ashfak?")

[Document(id='ce88e2c3-2a56-4d18-ad17-143b2de56efa', metadata={}, page_content='ML / AI Engineer · Researcher\n\nBuilding reliable AI systems for real‑world impact.\n\nI specialize in LLMs, RAG systems, and computer vision, grounded in a strong statistics foundation. I build clean, scalable, production-ready AI systems and research tools that deliver real impact\n\nView projects ↓ Hire / collaborate Download resume\n\nNoakhali, Bangladesh · B.Sc. in Statistics (NSTU, 2022–2026)\n\nPhone: 01730644634\n\nEmail: azizashfak@gmail.com\n\nGitHub ↗ LinkedIn ↗ Kaggle ↗ LeetCode ↗\n\nPortrait of Aziz Ashfak\n\nAziz Ashfak\n\nML/AI Engineer · LLMs · RAG · CV\n\nOpen to roles & collaborations\n\n100+\n\nStudents mentored\n\n10+\n\nDeployed apps\n\nRAG systems\n\nCV pipelines\n\nExperience\n\nAI Engineer · ROYALX LLC\n\nRemote · Dhaka, Bangladesh · Feb 2025 – Sep 2025 · Full-time\n\nLLM / RAG\n\nBuilt LLM apps (OpenAI, Groq, HF) with LangChain/LlamaIndex for generation, summarization, and chatbots

In [55]:
prompt = ChatPromptTemplate.from_template(
    """You are a helpful assistant that helps people find information about Aziz Ashfak from his personal website.
    "Use the following pieces of context to answer the question at the end. "
    "If you don't know the answer, just say that you don't know, don't try to make up an answer.
    
    context:
    {context}
    
    Question: 
    {question}"""
)

In [56]:
# Convert Documents → string
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

In [57]:
# Chain
chain = (
    {
        "context": retrive | format_docs,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
)



In [58]:
chain.invoke( "Who is Aziz Ashfak?")

AIMessage(content='Aziz Ashfak is an ML/AI Engineer and Researcher who specializes in LLMs, RAG systems, and computer vision, with a strong foundation in statistics. He builds reliable AI systems and research tools for real-world impact. He is open to roles and collaborations, and has experience working as an AI Engineer at ROYALX LLC. He is also a mentor and has guided over 100 students in project-based curricula.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 90, 'prompt_tokens': 829, 'total_tokens': 919, 'completion_time': 0.284085779, 'completion_tokens_details': None, 'prompt_time': 0.084366406, 'prompt_tokens_details': None, 'queue_time': 0.160744543, 'total_time': 0.368452185}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_3272ea2d91', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--aa1b8ecf-1d6c-4d68-859e-aceaebe349b5-0', usage_metadata={'input_tokens': 829, 'out