# Tage and Store
This workflow will tag information.

## Requirements
```bash
pip install python-dotenv langchain
```

In [None]:
from pydantic import BaseModel, Field
from typing import List, Optional


# Datamodel representing a single tag with a description.
class Tag(BaseModel):
    tag: str = Field(description="The tag it self")
    description: str = Field(description="A brief description of the tag")


# Datamodel representing the structured tags for a document.
class DocumentTags(BaseModel):
    primary_topic: str = Field(description="The main topic or subject of the document")
    tags: List[Tag] = Field(description="List of relevant tags for the document")
    summary: str = Field(description="A brief summary of the document content")

In [None]:
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser


def generate_document_tags(content: str, prompt_template: str, model) -> DocumentTags:
    """
    Generates structured tags for a given document content.
    
    Args:
        content: The document text content
        model: The LLM model to use
        rate_limiter: Optional rate limiter for API calls
    
    Returns:
        DocumentTags object containing generated tags and metadata
    """
    # Initialize the output parser
    parser = PydanticOutputParser(pydantic_object=DocumentTags)
    
    # Assemble the prompt
    prompt = PromptTemplate(
        template=prompt_template,
        input_variables=["content"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
        
    # Create and run the chain
    chain = prompt | model | parser
    result = chain.invoke({"content": content})
    
    return result

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
from langchain_openai import ChatOpenAI


load_dotenv(find_dotenv())

with open("prompt.txt", "r") as f:
    template = f.read()

with open("example_document.json", "r") as f:
    data = json.load(f)

# Initialize your model and rate limiter here
model = ChatOpenAI(
    model=os.getenv('gpt-4o-mini'),
    temperature=0,
    seed=42,
    n=1,
)

In [None]:
# Process the documents
result = generate(data, template, model)
print(result)