In [69]:
import getpass
import os
import bs4
from typing import Optional
from datetime import date
from langchain_community.document_loaders import WebBaseLoader
from pydantic import BaseModel, Field
from langchain.chat_models import init_chat_model
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.documents import Document

In [70]:
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI API key: ")

In [71]:
class Profil(BaseModel):
    title: Optional[str] = Field(default=None, description="Title of the job offer")
    company: Optional[str] = Field(default=None, description="Company offering the job")
    location: Optional[str] = Field(default=None, description="Location of the job")
    contract_type: Optional[str] = Field(default=None, description="Type of contract (CDI, CDD, alternance...)")
    publication_date: Optional[date] = Field(default=None, description="Date when the job was posted format YYYY-MM-DD")
    experience: Optional[str] = Field(default=None, description="Required experience level")
    skills: Optional[list[str]] = Field(default=None, description="Required technical skills (languages, tools...)")
    soft_skills: Optional[list[str]] = Field(default=None, description="Soft skills required for the job")
    salary: Optional[str] = Field(default=None, description="Salary range if mentioned")
    description: Optional[str] = Field(default=None, description="Detailed job description")

In [72]:
def load_pages(urls : list[str]) -> list[Document]:
    docs = []
    loader = WebBaseLoader(web_path=urls)
    for doc in loader.lazy_load():
        docs.append(doc)
    return docs

In [73]:
page_url = "https://www.langchain.com/"
docs = load_pages([page_url])
doc = docs[0]
print(f"{doc.metadata}\n")
print(doc.page_content[:2000].strip())

{'source': 'https://www.langchain.com/', 'title': 'LangChain', 'description': 'LangChain’s suite of products supports developers along each step of their development journey.', 'language': 'en'}

LangChain




















Products

FrameworksLangGraphLangChainPlatformsLangSmithLangGraph PlatformResources

GuidesBlogCustomer StoriesLangChain AcademyCommunityEventsChangelogDocs

PythonLangGraphLangSmithLangChainJavaScriptLangGraphLangSmithLangChainCompany

AboutCareersPricingGet a demoSign up












Products

FrameworksLangGraphLangChainPlatformsLangSmithLangGraph PlatformResources

GuidesBlogCustomer StoriesLangChain AcademyCommunityEventsChangelogDocs

PythonLangGraphLangSmithLangChainJavaScriptLangGraphLangSmithLangChainCompany

AboutCareersPricingGet a demoSign upThe platform for reliable agents. Tools for every step of the agent development lifecycle -- built to unlock powerful AI in production.Request a demoSee the docs

LangChain products power top engineering teams, from

In [74]:
model = init_chat_model("gemini-2.0-flash", model_provider="google-genai")
structured_llm = model.with_structured_output(schema=Profil)

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        ("human", "{text}"),
    ]
)

In [75]:
for doc in docs:
    print(doc.metadata)
    text = doc.page_content
    prompt = prompt_template.invoke({"text": text})
    res = structured_llm.invoke(prompt)
    res_dict = res.model_dump()
    for key, value in res_dict.items():
        print(f"{key}: {value}")

{'source': 'https://www.langchain.com/', 'title': 'LangChain', 'description': 'LangChain’s suite of products supports developers along each step of their development journey.', 'language': 'en'}
title: AI Agent Platform
company: LangChain
location: None
contract_type: None
publication_date: None
experience: None
skills: ['orchestration', 'integrations', 'evals', 'observability', 'deployment']
soft_skills: ['AI']
salary: None
description: The platform for reliable agents. Tools for every step of the agent development lifecycle -- built to unlock powerful AI in production.
