**Classify Text into Labels**

Tagging means labeling a document with classes such as:

Sentiment, Language, Style (formal, informal etc.), Covered topics, Political tendency


In [16]:
import getpass
import os

if not os.environ.get("GOOGLE_API_KEY"):
  os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")


from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-1.5-pro-latest", model_provider="google_genai")

Let's specify a Pydantic model with a few properties and their expected type in our schema.

In [17]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from pydantic import BaseModel, Field

tagging_prompt = ChatPromptTemplate.from_template(
    """
Extract the desired information from the following passage.

Only extract the properties mentioned in the 'Classification' function.

Passage:
{input}
"""
)


class Classification(BaseModel):
    sentiment: str = Field(description="The sentiment of the text")
    aggressiveness: int = Field(
        description="How aggressive the text is on a scale from 1 to 10"
    )
    language: str = Field(description="The language the text is written in")


# LLM
llm = ChatGoogleGenerativeAI(temperature=0, model="gemini-2.0-flash").with_structured_output(
    Classification
)

In [18]:
inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

response

Classification(sentiment='Positive', aggressiveness=1, language='Spanish')

In [19]:
inp = "Estoy muy enojado con vos! Te voy a dar tu merecido!"
prompt = tagging_prompt.invoke({"input": inp})
response = llm.invoke(prompt)

response.model_dump()

{'sentiment': 'Negative', 'aggressiveness': 10, 'language': 'Spanish'}

*Finer control*

Careful schema definition gives us more control over the model's output.

Specifically, we can define:

Possible values for each property,
Description to make sure that the model understands the property,
Required properties to be returned,

Let's redeclare our Pydantic model to control for each of the previously mentioned aspects using enums:

In [49]:
from pydantic import BaseModel, Field

class Classifications(BaseModel):
    sentimentsss: str = Field(..., description="Describes the sentiment of the statement (e.g., happy, neutral, sad).")
    aggressivenesss: int = Field(
        ...,
        description="Describes how aggressive the statement is. The higher the number, the more aggressive.",
    )
    languagesss: str = Field(
        ..., description="Specifies the language of the statement (e.g., Spanish, English, French, etc.)."
    )



# class Classifications(BaseModel):
#     sentiment: str = Field(description="The sentiment of the text", enum=["happy", "neutral", "sad"])
#     aggressiveness: int = Field(
#         description="describes how aggressive the statement is, the higher the number the more aggressive", enum=[1, 2, 3, 4, 5])
#     language: str = Field(description="The language the text is written in", enum=["spanish", "english", "french", "german", "italian"])


***enum is not working with gemini***

In [50]:
tagging_prompt = ChatPromptTemplate.from_template(
    """
    Extract the desired information from the following passage.
    Only extract the properties mentioned in the 'Classification' function.
    Passage:
    {input}
    """
)

llm = ChatGoogleGenerativeAI(temperature=0, model="gemini-2.0-flash").with_structured_output(Classifications)

In [51]:
assert isinstance(Classifications, type)
print(isinstance(Classifications, type))
print(Classifications.schema())

True
{'properties': {'sentimentsss': {'description': 'Describes the sentiment of the statement (e.g., happy, neutral, sad).', 'title': 'Sentimentsss', 'type': 'string'}, 'aggressivenesss': {'description': 'Describes how aggressive the statement is. The higher the number, the more aggressive.', 'title': 'Aggressivenesss', 'type': 'integer'}, 'languagesss': {'description': 'Specifies the language of the statement (e.g., Spanish, English, French, etc.).', 'title': 'Languagesss', 'type': 'string'}}, 'required': ['sentimentsss', 'aggressivenesss', 'languagesss'], 'title': 'Classifications', 'type': 'object'}


C:\Users\New\AppData\Local\Temp\ipykernel_19508\632727839.py:3: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/
  print(Classifications.schema())


Now the answers will be restricted in a way we expect!

In [52]:
inp = "Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!"
prompt = tagging_prompt.invoke({"input": inp})
llm.invoke(prompt)

Classifications(sentimentsss='happy', aggressivenesss=0, languagesss='Spanish')

In [53]:
inp = "Estoy muy enojado con vos! Te voy a dar tu merecido!"
prompt = tagging_prompt.invoke({"input": inp})
llm.invoke(prompt)

Classifications(sentimentsss='angry', aggressivenesss=9, languagesss='Spanish')

In [54]:
inp = "Weather is ok here, I can go outside without much more than a coat"
prompt = tagging_prompt.invoke({"input": inp})
llm.invoke(prompt)

Classifications(sentimentsss='neutral', aggressivenesss=0, languagesss='English')