# Career Agent Use Case

### Parsing Functions

In [36]:
import requests
from bs4 import BeautifulSoup
from pypdf import PdfReader
from PIL import Image
import pytesseract

def parse_web_page(profile_url):
    response = requests.get(profile_url)
    soup = BeautifulSoup(response.text, "html.parser")
    content = soup.get_text(strip=True, separator=' ')
    return content

def parse_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    content = ""
    for page in reader.pages:
        text = page.extract_text()
        if text:
            content += text
    return content

def parse_text(text_path):
    with open(text_path, "r", encoding="utf-8") as f:
        content = f.read()
    return content

def parse_image(image_path):
    image = Image.open(image_path)
    text = pytesseract.image_to_string(image)
    return text


### Preprocessing Functions

In [37]:
import re

def split_content(doc, separators):
    # split the document into parts using the separators
    pattern = f"({'|'.join(re.escape(sep) for sep in separators)})"
    docs = re.split(pattern, doc)
    parts = [doc.strip() for doc in docs if doc.strip()]
    return parts

def combine_parts(parts, separators):
    combined_docs = []
    for i in range(len(parts)):
        if parts[i] in separators:
            combined_docs.append(parts[i] + ": " + parts[i+1])
            i += 1
    return combined_docs

def drop_parts(parts, separators):
    return [part for part in parts if part not in separators]

def split_fixed_size(text, size):
    return [text[i:i+size] for i in range(0, len(text), size)]

def beatiful_print(subject, content):
    print("===============================")
    print(subject)
    print("===============================")
    for sent in content.split('.'):
        print(sent)
    print("===============================\n\n")



### Reading Files

In [38]:
profile_url = "https://mohamedmurad.github.io/"
linkedin_path = "Mohamed Murad Data/linkedin.pdf"
summary_path = "Mohamed Murad Data/summary.txt"
resume_path = "Mohamed Murad Data/resume.pdf"

profile_content = parse_web_page(profile_url)
linkedin_content = parse_pdf(linkedin_path)
summary_content = parse_text(summary_path)
resume_content = parse_pdf(resume_path)

In [39]:
# beatiful_print("Profile Content: ", profile_content)
# beatiful_print("Linkedin Content: ", linkedin_content)
# beatiful_print("Summary Content: ", summary_content)
# beatiful_print("Resume Content: ", resume_content)
# print(len(profile_content))
# print(len(linkedin_content))
# print(len(summary_content))
# print(len(resume_content))

In [40]:
sections = ['Summary', 'Contact', 'Education', 'Experience', 'Skills', 'Projects', 'Certifications', 'Certificates', 'Publications', 'Awards', 'Patents', 'Other']
subsections = ['Eventum Solutions', 'Alinma Bank', 'Communications, Space & Technology Commission (CST)', 'Arab National Bank', 'EJADA', 'Ejada Systems', 'Neoleap:', 'Ministry of Human Resources and Social Development', 'Ministry of Human Resources and Social Development - KSA', 'Zakat, Tax and Customs Authority', 'Eventum IT Solutions', 'Alexandria University']
special_chars = ['.']
fixed_size_split = 1000

docs = []

# 1. Profile
docs.extend(drop_parts(split_content(profile_content, special_chars), special_chars))

# 2. Linkedin
docs.extend(combine_parts(split_content(linkedin_content, sections), sections))
docs.extend(combine_parts(split_content(linkedin_content, subsections), subsections))
docs.extend(split_fixed_size(linkedin_content, fixed_size_split))
docs.extend(drop_parts(split_content(linkedin_content, special_chars), special_chars))

# 3. Resume
docs.extend(combine_parts(split_content(resume_content, sections), sections))
docs.extend(combine_parts(split_content(resume_content, subsections), subsections))
docs.extend(split_fixed_size(resume_content, fixed_size_split))
docs.extend(drop_parts(split_content(resume_content, special_chars), special_chars))

# 4. Summary
docs.extend(drop_parts(split_content(summary_content, special_chars), special_chars))


In [41]:
# for doc in docs:
#     print(doc, "\n==========")

### LLM models Set-Up

In [42]:
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv(override=True)

ai_client = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

deepseek_model_name = "deepseek-r1:1.5b"
qwen_model_name = "qwen3:0.6b"
llama_model_name = "tinyllama:latest"
embedding_model_name = "all-minilm:22m"

### Storing Context

##### Embedding

In [43]:
def get_embedding(docs):
    embeddings = []
    for doc in docs:
        emb = ai_client.embeddings.create(model=embedding_model_name, input=doc)
        embeddings.append(emb)
    return embeddings

def extract_embedding_list(embeddings):
    embeddings_data = [embeddings[i].data[0].embedding for i in range(len(embeddings))]
    return embeddings_data

##### Vector Database

In [44]:
import chromadb

def create_chroma_collection(db_name):
    client = chromadb.Client()
    client.delete_collection(db_name) if db_name in [c.name for c in client.list_collections()] else None
    collection = client.create_collection(name=db_name)
    return collection

In [45]:
db_name = "career_agent_db"

# step 1: create the collection
collection = create_chroma_collection(db_name)

# step 2: get the embeddings
embeddings = get_embedding(docs)
embeddings_data = extract_embedding_list(embeddings)

# step 3: store the embeddings in the collection
collection.add(documents=docs, embeddings=embeddings_data, ids=[f"doc_{i}" for i in range(len(docs))])

##### Test Vector Store

In [46]:
# Query: Test 1
query1 = "tell me about your experience at Alinma Bank"
query2 = "Can we talk about your private life?"
queries = [query1, query2]

def print_context(query, results):
    print(f"Query: {query}\nTop matches:")
    for result in results["documents"][0]:
        print(result+"\n======")

def find_context(query, top_matches=2):
    query_emb = get_embedding([query])
    query_emb_list = extract_embedding_list(query_emb)
    results = collection.query(query_embeddings=query_emb_list, n_results=top_matches)
    return results

for query in queries:
    results = find_context(query, 3)
    print_context(query, results)
    print("=================\n\n")


Query: tell me about your experience at Alinma Bank
Top matches:
Alinma Bank
Artificial Intelligence Engineer
December 2021 - December 2022 (1 year 1 month)
Riyadh, Saudi Arabia
Enhancing and monitoring projects like churn predictions, affluent predictions,
for potential revolving card customers using IBM Cloud Pak for Data, Oracle
SQL, Jupyter, and IBM AutoAI, providing stakeholders with insights and
recommendations
Ministry of Human Resources and Social Development - KSA
Data Migration Specialist
December 2020 - December 2021 (1 year 1 month)
Riyadh, Saudi Arabia
Participated in analyzing the old database, designing the new one, and
preparing mapping sheets for data migration
Alinma Bank: Artificial Intelligence Engineer
December 2021 - December 2022 (1 year 1 month)
Riyadh, Saudi Arabia
Enhancing and monitoring projects like churn predictions, affluent predictions,
for potential revolving card customers using IBM Cloud Pak for Data, Oracle
SQL, Jupyter, and IBM AutoAI, providing sta

### Tools

In [47]:
import json
import os

pushover_user = os.getenv("PUSHOVER_USER")
pushover_token = os.getenv("PUSHOVER_TOKEN")
pushover_url = "https://api.pushover.net/1/messages.json"

def push(message):
    print(f"Push: {message}")
    payload = {"user": pushover_user, "token": pushover_token, "message": message}
    requests.post(pushover_url, data=payload)

In [48]:
# user details
def record_user_details(email, name="Name not provided", notes="not provided"):
    push(f"Recording interest from {name} with email {email} and notes {notes}")
    return {"recorded": "ok"}


record_user_details_json = {
    "name": "record_user_details",
    "description": "Use this tool to record that a user is interested in being in touch and provided an email address",
    "parameters": {
        "type": "object",
        "properties": {
            "email": {
                "type": "string",
                "description": "The email address of this user"
            },
            "name": {
                "type": "string",
                "description": "The user's name, if they provided it"
            }
            ,
            "notes": {
                "type": "string",
                "description": "Any additional information about the conversation that's worth recording to give context"
            }
        },
        "required": ["email"],
        "additionalProperties": False
    }
}

In [49]:
# unknown question
def record_unknown_question(question):
    push(f"Recording {question} asked that I couldn't answer")
    return {"recorded": "ok"}

record_unknown_question_json = {
    "name": "record_unknown_question",
    "description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question that couldn't be answered"
            },
        },
        "required": ["question"],
        "additionalProperties": False
    }
}

In [50]:
tools = [{"type": "function", "function": record_user_details_json},
        {"type": "function", "function": record_unknown_question_json}]

In [51]:
def handle_tool_calls(tool_calls):
    results = []
    for tool_call in tool_calls:
        tool_name = tool_call.function.name
        arguments = json.loads(tool_call.function.arguments)
        print(f"Tool called: {tool_name}", flush=True)
        tool = globals().get(tool_name)
        result = tool(**arguments) if tool else {}
        results.append({"role": "tool","content": json.dumps(result),"tool_call_id": tool_call.id})
        print("\n\n\n============",result,"\n===============\n\n")
    return results

In [52]:
globals()["record_unknown_question"]("this is a really hard question")

Push: Recording this is a really hard question asked that I couldn't answer


{'recorded': 'ok'}

### Chat

### 1. Ollama models

In [53]:
def build_system_prompt(name, context_list):
    context = ""
    for result in context_list["documents"][0]:
        context += "- " + result + ".\n\n"

    system_prompt = f"""You are representing {name} and responding to questions submitted via {name}'s website.
    Your role is to accurately reflect {name}’s professional background, expertise, and experience, based on the provided context.
    Respond in a clear, concise, and professional tone, suitable for potential clients, collaborators, or employers.

    You are provided with verified background information to support your answers. Use it exclusively to generate accurate and helpful responses.
    If a question falls outside the provided context or you are uncertain about the answer, log it using the 'record_unknown_question' tool.

    If the user seems interested in further communication, kindly request their email and record it using the 'record_user_details' tool.

    Always stay in character as {name}.

    Context:
    {context}

    Now, using the above context, respond appropriately to the user's latest message.
    """
    return system_prompt

In [54]:
name = "Mohamed Murad Ismail"
query_test = "Please tell me about your work in CST"

In [55]:
# testing...
context = find_context(query_test, 3)
system_prompt = build_system_prompt(name, context)
print(system_prompt)

You are representing Mohamed Murad Ismail and responding to questions submitted via Mohamed Murad Ismail's website.
    Your role is to accurately reflect Mohamed Murad Ismail’s professional background, expertise, and experience, based on the provided context.
    Respond in a clear, concise, and professional tone, suitable for potential clients, collaborators, or employers.

    You are provided with verified background information to support your answers. Use it exclusively to generate accurate and helpful responses.
    If a question falls outside the provided context or you are uncertain about the answer, log it using the 'record_unknown_question' tool.

    If the user seems interested in further communication, kindly request their email and record it using the 'record_user_details' tool.

    Always stay in character as Mohamed Murad Ismail.

    Context:
    - I have also worked as a Data
Scientist at CST and an Artificial Intelligence Engineer at Alinma
Bank, where I developed 

In [56]:
def chat(message, history):
    context = find_context(message)
    system_prompt = build_system_prompt(name, context)

    # Truncate history to last 3 turns if longer
    history = history[-3:] if len(history) > 3 else history

    # Start with full messages
    messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
    
    while True:
        response = ai_client.chat.completions.create(
            model=qwen_model_name,
            messages=messages,
            tools=tools,
        )

        choice = response.choices[0]
        finish_reason = choice.finish_reason

        if finish_reason == "tool_calls":
            tool_calls = choice.message.tool_calls
            tool_results = handle_tool_calls(tool_calls)
            messages.append(choice.message)
            messages.extend(tool_results)
        else:
            reply = choice.message.content
            break

    # Optional: Remove <think> tags and everything before them
    if reply and "</think>\n\n" in reply:
        reply = reply.split("</think>\n\n", 1)[1].strip()

    # Debug logs (can be removed or replaced with logging)
    # print("history:", history)
    # print("reply:", reply)

    return reply

### 2. Google (Gemeni) API

In [57]:
load_dotenv(override=True)
google_api_key = os.getenv('GOOGLE_API_KEY')
google_base_url = os.getenv('GOOGLE_BASE_URL')

gemini_model_name = "gemini-2.0-flash"

In [58]:
def chat(message, history):
    context = find_context(message)
    system_prompt = build_system_prompt(name, context)

    # Truncate history to last 3 turns if longer
    history = history[-3:] if len(history) > 3 else history

    # Start with full messages
    messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
    
    while True:
        gemini = OpenAI(api_key=google_api_key, base_url=google_base_url)
        response = gemini.chat.completions.create(model=gemini_model_name, messages=messages, tools=tools)

        choice = response.choices[0]
        finish_reason = choice.finish_reason

        if finish_reason == "tool_calls":
            tool_calls = choice.message.tool_calls
            tool_results = handle_tool_calls(tool_calls)
            messages.append(choice.message)
            messages.extend(tool_results)
        else:
            reply = choice.message.content
            break

    return reply

### Run Interface

In [59]:
import gradio as gr

gr.ChatInterface(chat, 
type="messages",
title="💼 Career Agent",
description="Mohamed Murad AI Career Avatar.",
submit_btn="Send",
theme="soft",
examples=[
        ["What is your current Job title?"],
        ["How many years of experience do you have?"],
        ["Do you have any technical certificates?"],
        ["What is your experience in MLOps?"],
        ["Talk about your recent client project?"],
        ["How many clients you have worked for?"],
        ["Tell me about your lifestyle?"],
        ["I need to contact with you"]]
).launch()

* Running on local URL:  http://127.0.0.1:7872
* To create a public link, set `share=True` in `launch()`.




Tool called: record_user_details
Push: Recording interest from Name not provided with email test@gmail.com and notes not provided





