## Import dependencies

In [34]:
from langchain.chat_models import init_chat_model
from models import nova_micro_config
from rich import print

from langchain.messages import AIMessage, HumanMessage, SystemMessage

### Create model using init chat model. 

In [None]:
model = init_chat_model(
        model=nova_micro_config.model_id,
        model_provider="bedrock_converse",
        temperature=nova_micro_config.temperature,
        max_tokens=nova_micro_config.max_tokens,
        credentials_profile_name="sandbox",
    )

### Model conversations and messages

In [37]:
conversation_by_dict = [
        {
            "role": "system",
            "content": "You are a helpful assistant that translates English to French.",
        },
        {"role": "user", "content": "Translate: I love programming."},
        {"role": "assistant", "content": "J'adore la programmation."},
        {"role": "user", "content": "Translate: I love building applications."},
    ]
conversation = [
        SystemMessage("You are a helpful assistant that translates English to French."),
        HumanMessage("Translate: I love programming."),
        AIMessage("J'adore la programmation."),
        HumanMessage("Is this correct? please explain"),
    ]
    ## Invoking Model with conversation
response = model.invoke(conversation)
print(response.text)

### Streaming Response

In [38]:
# Invoking model with stream
output=""
for chunk in model.stream("How to use emergency breaking, while cycling in downhill?"):
    output=output+ chunk.text
print(output)

### Model batch processing:

This helps to run request in batch in parallel and help to reduce cost. We restrict the concurrency with config

In [40]:
batched = [
    "How to preapare for sleep",
    "Does cow folly any bed time routine",
    "Should I hire a navy seal for protecting my bicycle?",
]
responses = model.batch(batched, config={"max_concurrency": 5})
for response in responses:
    print(response.text)

### Batch as completed



In [43]:
for response in model.batch_as_completed(batched):
    print(response[0],response[1].text)


## Tool Calling

- Tool is an external function or coroutine which model can execute
- Tool needs to be bounded with model with the `bind` method to make sure model can use that tool

#### Create a Tool

In [44]:
from langchain.tools import tool

@tool
def get_weather(location:str)-> str:
    """Get weather for a location
    """
    if location in ["London","Cambridge","Bath"]:
        return f"It is always raining in UK. So City {location} is not an exception"
    return f"It is sunny in {location}"

#### Bind tool and invoke

In [45]:
# bind tools
model_with_tool=model.bind_tools([get_weather])

messages=[HumanMessage("What's the weather like in Honululu, Bath and London ?")]
# Model generate tool calls
response = model_with_tool.invoke(messages)
messages.append(response)
print(response.tool_calls)
# for tool_call in response.tool_calls:
#     # View tool calls made by the model
#     print(f"Tool: {tool_call['name']}")
#     print(f"Args: {tool_call['args']}")
   

In [46]:
# Execute tool call and collect result
for tool_call in response.tool_calls:
    # Execute the tool call with generated arguments
    print(tool_call)
    tool_result=get_weather.invoke(tool_call)
    messages.append(tool_result)

# Construct the final message
final_response= model_with_tool.invoke(messages)
print(final_response.text)

### Structured Output

#### Using Pydantic models

In [52]:
from pydantic import BaseModel, Field

class Movie(BaseModel):
    title: str = Field(...,description="Movie title. E.g. Titanic, Intersteller")
    year: int = Field(...,description="The year movie is published")
    director: str = Field(..., description="Director of the movie. E.g. James Cameron")
    rating: float = Field(..., description="Rating of the movie, you will decide with your knowledge")
    summary: str = Field(...,description="a sort summary of the movie")

#### Using Typedict

In [57]:
from typing_extensions import TypedDict, Annotated


class MovieDict(TypedDict):
    title: Annotated[str, ..., "Movie title. E.g. Titanic, Intersteller"]
    year: Annotated[int, ..., "The year movie is published"]
    director: Annotated[str, ..., "Director of the movie. E.g. James Cameron"]
    rating: Annotated[
        float, ..., "Rating of the movie, you will decide with your knowledge"
    ]
    summary: Annotated[str, ..., "a sort summary of the movie"]

#### Json Schema

In [63]:

movie_schema = {
    "title": "Movie",
    "description": "A movie with details",
    "type": "object",
    "properties": {
        "title": {
            "type": "string",
            "description": "The title of the movie"
        },
        "year": {
            "type": "integer",
            "description": "The year the movie was released"
        },
        "director": {
            "type": "string",
            "description": "The director of the movie"
        },
        "rating": {
            "type": "number",
            "description": "The movie's rating out of 10"
        },
         "summary": {
            "type": "string",
            "description": "a sort summary of the movie"
        },

    },
    "required": ["title", "year", "director", "rating","summary"]
}


In [61]:
model_with_structure = model.with_structured_output(Movie)
response = model_with_structure.invoke(
    [
        SystemMessage("You are a movie critiq, your task is to provide movie details. User may provide more that one"),
        HumanMessage("Harry potter and the chember of secrets, Intersteller"),
    ]
)
print(response)


In [None]:
model_with_structure=model.with_structured_output(movie_schema,method="json_schema", include_raw=True)
response = model_with_structure.invoke(
    [
        SystemMessage("You are a movie critiq, your task is to provide movie details. User may provide more that one"),
        HumanMessage("Harry potter and the chember of secrets, Intersteller"),
    ]
)
print(response)

In [68]:
print(model.profile)

### Rate Limiting

In [77]:
from langchain_core.rate_limiters import InMemoryRateLimiter
from models import nova_pro_config


in_memory_limiter=InMemoryRateLimiter(
    requests_per_second=0.1, # 1 Request every 10s
    check_every_n_seconds=0.1, # Check every 100ms wheather allowed to make a request
    max_bucket_size=10 # controls maximum burst size
)

model_nova_pro = init_chat_model(
        model=nova_pro_config.model_id,
        model_provider="bedrock_converse",
        temperature=nova_pro_config.temperature,
        max_tokens=nova_pro_config.max_tokens,
        credentials_profile_name="sandbox",
        rate_limiter=in_memory_limiter 
    )

##### Token usage

In [80]:
from langchain_core.callbacks import UsageMetadataCallbackHandler
from langchain_core.runnables import RunnableConfig
usage_metadata_callback_handler=UsageMetadataCallbackHandler()
confgis= RunnableConfig(
    run_id="EF0FE3BD-B0F1-4E9F-A210-CC9BC358A17F",
    run_name="Dragon emperor",
    max_concurrency=1,
    recursion_limit=1,
    metadata={"Dragon Name": "Tarin" },
    callbacks=[usage_metadata_callback_handler]
)

response=model_nova_pro.invoke("How to train your dragon", config=confgis)
print(response.text)
print(usage_metadata_callback_handler.usage_metadata)



##### Token usage with callback handler
Useful when multiple modles call

In [79]:
from langchain_core.callbacks import get_usage_metadata_callback

with get_usage_metadata_callback() as cb:
    print(model.invoke("How to train your dragon").text)
    print(model_nova_pro.invoke("How to train your dragon").text)

    print(cb.usage_metadata)
