# Text classification

In [1]:
import json

In [2]:
from openai import OpenAI

class LLM():
    def __init__(self, system_message):
        self.client = OpenAI()
        self.system_prompt = {"role": "system", "content": system_message}

    def __call__(self, *args, **kwds):
        return self.invoke(*args, **kwds)

    def invoke(self, text):
        completion = self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                self.system_prompt,
                {"role": "user", "content": text}
            ]
        )
        return completion.choices[0].message.content

In [6]:
llm = LLM("Given the user-submitted text, identify the language, sentiment, and aggressiveness. Output the results in json format with the following fields ['language', 'sentiment', 'aggressiveness'].")

In [7]:
response = llm("Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!")

In [8]:
response

'{\n  "language": "Spanish",\n  "sentiment": "positive",\n  "aggressiveness": "non-aggressive"\n}'

In [9]:
json.loads(response)

{'language': 'Spanish',
 'sentiment': 'positive',
 'aggressiveness': 'non-aggressive'}

In [11]:
from pydantic import BaseModel, Field

class Classification(BaseModel):
    """Text classification response"""
    sentiment: str = Field(description="The sentiment of the text")
    aggressiveness: int = Field(
        description="How aggressive the text is on a scale from 1 to 10"
    )
    language: str = Field(description="The language the text is written in")

In [4]:
f'{Classification.model_fields['sentiment']}'

"annotation=str required=True description='The sentiment of the text'"

In [5]:
system_prompt = ""
for key in Classification.model_fields.keys():
    field = Classification.model_fields[key]
    system_prompt += f"\n{key}: {field.annotation}  # {field.description}"

In [6]:
print(system_prompt)


sentiment: <class 'str'>  # The sentiment of the text
aggressiveness: <class 'int'>  # How aggressive the text is on a scale from 1 to 10
language: <class 'str'>  # The language the text is written in


In [12]:
from openai import OpenAI
from typing import Type
from pydantic import TypeAdapter

class LLM():
    def __init__(self, output_model:Type[BaseModel]):
        self.client = OpenAI()
        self.output_model = output_model
        system_prompt = "Given the user-submitted text, identify the following information."
        
        schema = self.output_model.model_json_schema()
        system_prompt += f"\nThe output json will contain {schema['description']}. Extract the information into JSON format:"
        system_prompt += '\n\n{'
        for field in schema['properties'].keys():
            system_prompt += f"\n{field}: {schema['properties'][field]['description']}  # {schema['properties'][field]['description']}"
        system_prompt += '\n}'
        
        self.system_prompt = {"role": "system", "content": system_prompt}

    def __call__(self, *args, **kwds):
        return self.invoke(*args, **kwds)
    
    def process(self, response:str) -> BaseModel|str:
        try:
            return TypeAdapter(self.output_model).validate_json(response)
        except Exception as e:
            print(e)
            return response

    def invoke(self, text):
        completion = self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                self.system_prompt,
                {"role": "user", "content": text}
            ],
            temperature=0.0,
        )
        response = completion.choices[0].message.content
        return self.process(response)

In [13]:
Classification.model_json_schema()

{'description': 'Text classification response',
 'properties': {'sentiment': {'description': 'The sentiment of the text',
   'title': 'Sentiment',
   'type': 'string'},
  'aggressiveness': {'description': 'How aggressive the text is on a scale from 1 to 10',
   'title': 'Aggressiveness',
   'type': 'integer'},
  'language': {'description': 'The language the text is written in',
   'title': 'Language',
   'type': 'string'}},
 'required': ['sentiment', 'aggressiveness', 'language'],
 'title': 'Classification',
 'type': 'object'}

In [14]:
llm = LLM(Classification)

In [47]:
print(llm.system_prompt['content'])


        Given the user-submitted text, identify the following information, and extract it into JSON format:
{
        
sentiment: <class 'str'>  # The sentiment of the text
aggressiveness: <class 'int'>  # How aggressive the text is on a scale from 1 to 10
language: <class 'str'>  # The language the text is written in
}


In [15]:
llm.invoke("Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!")

Classification(sentiment='positive', aggressiveness=1, language='Spanish')

In [20]:
from openai import OpenAI
from typing import Type
from pydantic import TypeAdapter

class LLM():
    def __init__(self, output_model:Type[BaseModel]):
        self.client = OpenAI()
        self.output_model = output_model
        system_prompt = "Given the user-submitted text, identify the required information."
        self.system_prompt = {"role": "system", "content": system_prompt}

    def __call__(self, *args, **kwds):
        return self.invoke(*args, **kwds)
    
    def process(self, response:str) -> BaseModel|str:
        try:
            return TypeAdapter(self.output_model).validate_json(response)
        except Exception as e:
            print(e)
            return response

    def invoke(self, text):
        completion = self.client.beta.chat.completions.parse(
            model="gpt-4o-mini",
            messages=[
                self.system_prompt,
                {"role": "user", "content": text}
            ],
            temperature=0.0,
            response_format=self.output_model
        )
        response = completion.choices[0].message.content
        return self.process(response)

In [21]:
llm = LLM(Classification)
llm.invoke("Estoy increiblemente contento de haberte conocido! Creo que seremos muy buenos amigos!")

Classification(sentiment='positive', aggressiveness=1, language='Spanish')

In [39]:
raw_json = json.loads('{\n    "sentiment": "positive",\n    "aggressiveness": 1,\n    "language": "Spanish"\n}')
processed_json = {}
for key in raw_json.keys():
    processed_json[key] = Classification.model_fields[key].annotation(raw_json[key])
model = Classification(**processed_json)

In [40]:
model

Classification(sentiment='positive', aggressiveness=1, language='Spanish')

In [51]:
llm.invoke("Estoy muy enojado con vos! Te voy a dar tu merecido!")

Classification(sentiment='enojado', aggressiveness=8, language='español')