In [1]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env file

True

In [13]:
from langchain.chat_models import init_chat_model

llm_validate = init_chat_model("gemini-2.0-flash-lite", model_provider="google_genai")
llm = init_chat_model("gemini-2.0-flash", model_provider="google_genai")

from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

responsive_prompt = ChatPromptTemplate.from_template(
    """
You are my personal representative. Recruiters might ask you about my skillset and project portfolio and other related things.
Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

class Classification(BaseModel):
    relativity: str = Field(
        description="Validate whether user input is related to my skillset, project portfolio or me.",
        enum=["True", "False"]
    )


# Structured LLM
structured_llm = llm.with_structured_output(Classification)

In [18]:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

classification_prompt = ChatPromptTemplate.from_template(
    """
You are my personal representative. Recruiters might ask you about my skillset and project portfolio and other related things.
Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)

class ClassificationTag(BaseModel):

    category: str = Field(
        description= "Decide for which category user input is related the most.",
        strict=True,
        examples=["about Me", "gallup", "experience", "programming","projects","projects","development","other" ]
    )
class Classification(BaseModel):

    aboutMe: str = Field(
        description= "Validate whether user input is related to me.",
        enum=["True", "False"]
    )
    gallup: str = Field(
        description= "Validate whether user input is related to my gallup strenghts",
        enum=["True", "False"]
    )
    experience: str = Field(
        description= "Validate whether user input is related to my experience.",
        enum=["True", "False"]
    )
    programming: str = Field(
        description= "Validate whether user input is related to my programming skills.",
        enum=["True", "False"]
    )
    projects: str = Field(
        description= "Validate whether user input is related to my project portfolio.",
        enum=["True", "False"]
    )
    development: str = Field(
        description= "Validate whether user input is related to my personal development.",
        enum=["True", "False"]
    )

# Structured LLM
structured_llm = llm_validate.with_structured_output(ClassificationTag)

inp = "How did you implement change data capture in your portfolio work?"
# inp = "Estoy muy enojado con vos! Te voy a dar tu merecido!"
prompt = classification_prompt.invoke({"input": inp})
response = structured_llm.invoke(prompt)

response.model_dump()

In [20]:
with open("../../docs/faq.txt", "r") as f:
    text = f.read()
    lines = text.split("\n")
print(lines)

['What was the objective and architecture of your e-commerce workflow project?', 'What was the objective and architecture of your CDC data streaming project?', 'How did you implement change data capture in your portfolio work?', 'How do your projects demonstrate your understanding of end-to-end data pipelines?', 'How have you used Docker and Docker Compose in your data engineering workflows?', 'How did you use Google Cloud tools like BigQuery or GCS in your projects?', 'Can you describe your experience orchestrating workflows?', 'How does your portfolio demonstrate readiness for a data/ai related role?', 'How do your Gallup strengths influence the way you approach data engineering tasks?', 'What areas are you currently focused on improving or learning next?']


In [19]:
for inp in lines:
    prompt = classification_prompt.invoke({"input": inp})
    response = structured_llm.invoke(prompt)
    print(inp[:-1])
    print(response.model_dump())

What was the objective and architecture of your e-commerce workflow project?
{'category': 'project portfolio'}
What was the objective and architecture of your CDC data streaming project?
{'category': 'project objective and architecture'}
How did you implement change data capture in your portfolio work?
{'category': 'change data capture'}
How do your projects demonstrate your understanding of end-to-end data pipelines?
{'category': 'Data pipelines'}
How have you used Docker and Docker Compose in your data engineering workflows?
{'category': 'data engineering'}
How did you use Google Cloud tools like BigQuery or GCS in your projects?
{'category': 'project'}
Can you describe your experience orchestrating workflows?
{'category': 'workflow orchestration'}
How does your portfolio demonstrate readiness for a data/ai related role?
{'category': 'portfolio'}
How do your Gallup strengths influence the way you approach data engineering tasks?
{'category': 'data engineering'}
What areas are you cur

In [13]:
"True" in response.model_dump().values()

True