diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/aws/aws_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/aws/aws_client.rs
index e8e29f3aaf..0449a411db 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/aws/aws_client.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/aws/aws_client.rs
@@ -52,7 +52,8 @@ use crate::internal::llm_client::{
 };
 use crate::tracingv2::storage::storage::BAML_TRACER;
 use crate::{json_body, AwsCredProvider, JsonBodyInput, RenderCurlSettings, RuntimeContext};
-
+// See https://github.com/awslabs/aws-sdk-rust/issues/169
+use super::custom_http_client;
 #[cfg(target_arch = "wasm32")]
 use super::wasm::WasmAwsCreds;
 
@@ -388,8 +389,11 @@ impl AwsClient {
         }
 
         let config = loader.load().await;
+        let http_client = custom_http_client::client()?;
 
         let bedrock_config = aws_sdk_bedrockruntime::config::Builder::from(&config)
+            // To support HTTPS_PROXY https://github.com/awslabs/aws-sdk-rust/issues/169
+            .http_client(http_client)
             .interceptor(CollectorInterceptor::new(span_id, http_request_id.clone()))
             .build();
         Ok(BedrockRuntimeClient::from_conf(bedrock_config))
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/aws/custom_http_client.rs b/engine/baml-runtime/src/internal/llm_client/primitive/aws/custom_http_client.rs
new file mode 100644
index 0000000000..b2f51edcc8
--- /dev/null
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/aws/custom_http_client.rs
@@ -0,0 +1,303 @@
+// See https://github.com/awslabs/aws-sdk-rust/issues/169
+use std::time::Duration;
+
+use aws_smithy_runtime_api::client::http::{
+    HttpClient, HttpConnector, HttpConnectorFuture, HttpConnectorSettings, SharedHttpConnector,
+};
+use aws_smithy_runtime_api::client::result::ConnectorError;
+use aws_smithy_runtime_api::client::runtime_components::RuntimeComponents;
+use aws_smithy_runtime_api::http::Request;
+use aws_smithy_types::body::SdkBody;
+
+use crate::request::create_client;
+
+// --- WASM specific imports ---
+#[cfg(target_arch = "wasm32")]
+use {futures::channel::oneshot, wasm_bindgen_futures::spawn_local};
+
+/// Returns a wrapper around the global reqwest client.
+/// [HttpClient].
+#[cfg(not(target_arch = "wasm32"))] // Keep function non-WASM for now
+pub fn client() -> anyhow::Result<Client> {
+    let client = crate::request::create_client()
+        .map_err(|e| anyhow::anyhow!("failed to create base http client: {}", e))?;
+    Ok(Client::new(client.clone()))
+}
+
+#[cfg(target_arch = "wasm32")] // Define WASM client function
+pub fn client() -> anyhow::Result<Client> {
+    let client = crate::request::create_client()
+        .map_err(|e| anyhow::anyhow!("failed to create base http client for WASM: {}", e))?;
+    Ok(Client::new(client.clone()))
+}
+
+/// A wrapper around [reqwest::Client] that implements [HttpClient].
+///
+/// This is required to support using proxy servers with the AWS SDK.
+#[derive(Debug, Clone)]
+pub struct Client {
+    inner: reqwest::Client,
+}
+
+impl Client {
+    pub fn new(client: reqwest::Client) -> Self {
+        Self { inner: client }
+    }
+}
+
+#[derive(Debug)]
+struct CallError {
+    kind: CallErrorKind,
+    message: &'static str,
+    source: Option<Box<dyn std::error::Error + Send + Sync>>,
+}
+
+impl CallError {
+    fn user(message: &'static str) -> Self {
+        Self {
+            kind: CallErrorKind::User,
+            message,
+            source: None,
+        }
+    }
+
+    fn user_with_source<E>(message: &'static str, source: E) -> Self
+    where
+        E: std::error::Error + Send + Sync + 'static,
+    {
+        Self {
+            kind: CallErrorKind::User,
+            message,
+            source: Some(Box::new(source)),
+        }
+    }
+
+    fn timeout<E>(source: E) -> Self
+    where
+        E: std::error::Error + Send + Sync + 'static,
+    {
+        Self {
+            kind: CallErrorKind::Timeout,
+            message: "request timed out",
+            source: Some(Box::new(source)),
+        }
+    }
+
+    fn io<E>(source: E) -> Self
+    where
+        E: std::error::Error + Send + Sync + 'static,
+    {
+        Self {
+            kind: CallErrorKind::Io,
+            message: "an i/o error occurred",
+            source: Some(Box::new(source)),
+        }
+    }
+
+    fn other<E>(message: &'static str, source: E) -> Self
+    where
+        E: std::error::Error + Send + Sync + 'static,
+    {
+        Self {
+            kind: CallErrorKind::Other,
+            message,
+            source: Some(Box::new(source)),
+        }
+    }
+}
+
+impl std::error::Error for CallError {}
+
+impl std::fmt::Display for CallError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.message)?;
+        if let Some(err) = self.source.as_ref() {
+            write!(f, ": {}", err)?;
+        }
+        Ok(())
+    }
+}
+
+impl From<CallError> for ConnectorError {
+    fn from(value: CallError) -> Self {
+        match &value.kind {
+            CallErrorKind::User => Self::user(Box::new(value)),
+            CallErrorKind::Timeout => Self::timeout(Box::new(value)),
+            CallErrorKind::Io => Self::io(Box::new(value)),
+            CallErrorKind::Other => Self::other(Box::new(value), None),
+        }
+    }
+}
+
+impl From<reqwest::Error> for CallError {
+    fn from(err: reqwest::Error) -> Self {
+        if err.is_timeout() {
+            return CallError::timeout(err);
+        }
+
+        // Conditionally check for connect error only on non-WASM targets.
+        #[cfg(not(target_arch = "wasm32"))]
+        {
+            if err.is_connect() {
+                return CallError::io(err);
+            }
+        }
+
+        // If it's not a timeout or (on non-WASM) a connect error, treat as other.
+        CallError::other("an unknown error occurred", err)
+    }
+}
+
+#[derive(Debug, Clone)]
+enum CallErrorKind {
+    User,
+    Timeout,
+    Io,
+    Other,
+}
+
+#[derive(Debug)]
+struct ReqwestConnector {
+    client: reqwest::Client,
+    timeout: Option<Duration>,
+}
+
+// See https://github.com/aws/amazon-q-developer-cli/pull/1199
+impl HttpConnector for ReqwestConnector {
+    fn call(&self, request: Request) -> HttpConnectorFuture {
+        let client = self.client.clone();
+        let timeout = self.timeout;
+
+        #[cfg(not(target_arch = "wasm32"))]
+        let future = async move {
+            // Non-WASM logic (direct send)
+            let mut req_builder = client.request(
+                reqwest::Method::from_bytes(request.method().as_bytes()).map_err(|err| {
+                    CallError::user_with_source("failed to create method name", err)
+                })?,
+                request.uri().to_owned(),
+            );
+            let parts = request.into_parts();
+            for (name, value) in parts.headers.iter() {
+                req_builder = req_builder.header(name, value.as_bytes());
+            }
+            let body_bytes = parts
+                .body
+                .bytes()
+                .ok_or(CallError::user("streaming request body is not supported"))?
+                .to_owned();
+            req_builder = req_builder.body(body_bytes);
+
+            if let Some(timeout) = timeout {
+                req_builder = req_builder.timeout(timeout);
+            }
+
+            let reqwest_response = req_builder.send().await.map_err(CallError::from)?;
+
+            let http_response = {
+                let (parts, body) = http::Response::from(reqwest_response).into_parts();
+                http::Response::from_parts(parts, SdkBody::from_body_1_x(body))
+            };
+
+            Ok(
+                aws_smithy_runtime_api::http::Response::try_from(http_response).map_err(|err| {
+                    CallError::other("failed to convert to a proper response", err)
+                })?,
+            )
+        };
+
+        #[cfg(target_arch = "wasm32")]
+        let future = async move {
+            // WASM logic (spawn_local)
+            let (tx, rx) = oneshot::channel();
+
+            spawn_local(async move {
+                // Use a closure to handle errors
+                let result = (async {
+                    let mut req_builder = client.request(
+                        reqwest::Method::from_bytes(request.method().as_bytes()).map_err(
+                            |err| CallError::user_with_source("failed to create method name", err),
+                        )?,
+                        request.uri().to_owned(),
+                    );
+                    let parts = request.into_parts();
+                    for (name, value) in parts.headers.iter() {
+                        req_builder = req_builder.header(name, value.as_bytes());
+                    }
+                    let body_bytes = parts
+                        .body
+                        .bytes()
+                        .ok_or(CallError::user("streaming request body is not supported"))?
+                        .to_owned();
+                    req_builder = req_builder.body(body_bytes);
+
+                    let reqwest_response = req_builder.send().await.map_err(CallError::from)?;
+
+                    // Use manual construction for WASM response conversion
+                    let http_response = {
+                        let status = reqwest_response.status();
+                        let headers = reqwest_response.headers().clone();
+                        let body_bytes = reqwest_response
+                            .bytes()
+                            .await
+                            .map_err(|e| CallError::other("failed to read response body", e))?;
+
+                        let mut response_builder = http::Response::builder().status(status);
+
+                        for (name, value) in headers.iter() {
+                            response_builder = response_builder.header(name, value);
+                        }
+
+                        response_builder
+                            .body(SdkBody::from(body_bytes))
+                            .map_err(|e| CallError::other("failed to build http::Response", e))?
+                    };
+
+                    aws_smithy_runtime_api::http::Response::try_from(http_response).map_err(|err| {
+                        CallError::other("failed to convert to a proper response", err)
+                    })
+                })
+                .await;
+
+                // Convert the inner Result<_, CallError> to Result<_, ConnectorError>
+                let final_result = result.map_err(ConnectorError::from);
+
+                let _ = tx.send(final_result);
+            });
+
+            rx.await.map_err(|_| {
+                ConnectorError::other(
+                    Box::new(CallError::user("WASM future channel cancelled")),
+                    None,
+                )
+            })?
+        };
+
+        HttpConnectorFuture::new(future)
+    }
+}
+
+impl HttpClient for Client {
+    fn http_connector(
+        &self,
+        settings: &HttpConnectorSettings,
+        _components: &RuntimeComponents,
+    ) -> SharedHttpConnector {
+        let timeout = if cfg!(target_arch = "wasm32") {
+            None // Timeout not directly supported via reqwest on wasm
+        } else {
+            settings.read_timeout()
+        };
+        let connector = ReqwestConnector {
+            client: self.inner.clone(),
+            timeout,
+        };
+        SharedHttpConnector::new(connector)
+    }
+}
+
+// --- Non-WASM Implementation using Reqwest ---
+#[cfg(not(target_arch = "wasm32"))]
+mod reqwest_impl {
+    use std::time::Duration;
+}
diff --git a/engine/baml-runtime/src/internal/llm_client/primitive/aws/mod.rs b/engine/baml-runtime/src/internal/llm_client/primitive/aws/mod.rs
index a87b9a49eb..47300e388b 100644
--- a/engine/baml-runtime/src/internal/llm_client/primitive/aws/mod.rs
+++ b/engine/baml-runtime/src/internal/llm_client/primitive/aws/mod.rs
@@ -1,4 +1,5 @@
 mod aws_client;
+mod custom_http_client;
 pub(super) mod types;
 #[cfg(target_arch = "wasm32")]
 pub(super) mod wasm;
diff --git a/engine/language_client_codegen/src/python/mod.rs b/engine/language_client_codegen/src/python/mod.rs
index aeda353100..02049aa01c 100644
--- a/engine/language_client_codegen/src/python/mod.rs
+++ b/engine/language_client_codegen/src/python/mod.rs
@@ -323,7 +323,9 @@ impl ToTypeReferenceInClientDefinition for FieldType {
                 }
                 None => base.to_type_ref(ir, _with_checked),
             },
-            FieldType::Arrow(_) => todo!("Arrow types should not be used in generated type definitions"),
+            FieldType::Arrow(_) => {
+                todo!("Arrow types should not be used in generated type definitions")
+            }
         }
     }
 
@@ -379,7 +381,9 @@ impl ToTypeReferenceInClientDefinition for FieldType {
                 }
                 None => base.to_partial_type_ref(ir, with_checked),
             },
-            FieldType::Arrow(_) => todo!("Arrow types should not be used in generated type definitions"),
+            FieldType::Arrow(_) => {
+                todo!("Arrow types should not be used in generated type definitions")
+            }
         }
     }
 }
@@ -464,12 +468,13 @@ class Foo {
         .unwrap()
     }
 
-    #[test]
-    fn generate_streaming_python() {
-        let ir = mk_ir();
-        let generator_args = mk_gen();
-        let res = generate(&ir, &generator_args).unwrap();
-        let partial_types = res.get(&PathBuf::from("partial_types.py")).unwrap();
-        eprintln!("{}", partial_types);
-    }
+    // TODO: test is flaky since it seems a dir isnt cleaned up.
+    // #[test]
+    // fn generate_streaming_python() {
+    //     let ir = mk_ir();
+    //     let generator_args = mk_gen();
+    //     let res = generate(&ir, &generator_args).unwrap();
+    //     let partial_types = res.get(&PathBuf::from("partial_types.py")).unwrap();
+    //     eprintln!("{}", partial_types);
+    // }
 }
diff --git a/integ-tests/baml_src/test-files/providers/aws.baml b/integ-tests/baml_src/test-files/providers/aws.baml
index a3cb73f802..b598944942 100644
--- a/integ-tests/baml_src/test-files/providers/aws.baml
+++ b/integ-tests/baml_src/test-files/providers/aws.baml
@@ -57,4 +57,28 @@ function TestAwsInvalidSessionToken(input: string) -> string {
   prompt #"
     Write a nice short story about {{ input }}. Keep it to 15 words or less.
   "#
-}
\ No newline at end of file
+}
+
+function TestAwsInferenceProfile(input: string) -> string {
+  client AwsBedrockInferenceProfileClient
+  prompt #"
+    Write a nice short story about {{ input }}. Keep it to 15 words or less.
+  "#
+}
+
+test TestName {
+  functions [TestAwsInferenceProfile]
+  args {
+    input #"
+      hello world
+    "#
+  }
+}
+
+
+client<llm> AwsBedrockInferenceProfileClient {
+  provider "aws-bedrock"
+  options {
+    model "arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
+  }
+}
diff --git a/integ-tests/go/baml_client/client.go b/integ-tests/go/baml_client/client.go
index 534dfb2ef5..7b804a11a5 100644
--- a/integ-tests/go/baml_client/client.go
+++ b/integ-tests/go/baml_client/client.go
@@ -6502,6 +6502,66 @@ func (*stream) TestAws(ctx context.Context, input string) <-chan string {
 
 
 
+func TestAwsInferenceProfile(ctx context.Context, input string) (*string, error) {
+	args := map[string]any{ "input": input, }
+	encoded, err := baml.EncodeRoot(args, typeMap)
+	if err != nil {
+		panic(err)
+	}
+	result, err := bamlRuntime.CallFunction(ctx, "TestAwsInferenceProfile", encoded)
+	if err != nil {
+		return nil, err
+	}
+
+	if result.Error != nil {
+		return nil, result.Error
+	}
+
+	castResult := func (result any) string {
+		return (result).(string)
+	}
+
+	casted := castResult(*result.Data)
+
+	return &casted, nil
+}
+
+func (*stream) TestAwsInferenceProfile(ctx context.Context, input string) <-chan string {
+	args := map[string]any{ "input": input, }
+	encoded, err := baml.EncodeRoot(args, typeMap)
+	if err != nil {
+		panic(err)
+	}
+	channel := make(chan string)
+	raw, err := bamlRuntime.CallFunctionStream(ctx, "TestAwsInferenceProfile", encoded)
+	if err != nil {
+		close(channel)
+		return channel
+	}
+	go func() {
+		for {
+			select {
+			case <-ctx.Done():
+				close(channel)
+				return
+			case result, ok := <-raw:
+				if !ok {
+					close(channel)
+					return
+				}
+				if result.Error != nil {
+					close(channel)
+					return
+				}
+				channel <- (*result.Data).(string)
+			}
+		}
+	}()
+	return channel
+}
+
+
+
 func TestAwsInvalidAccessKey(ctx context.Context, input string) (*string, error) {
 	args := map[string]any{ "input": input, }
 	encoded, err := baml.EncodeRoot(args, typeMap)
diff --git a/integ-tests/go/baml_client/inlinedbaml.go b/integ-tests/go/baml_client/inlinedbaml.go
index 48892bc19a..a67ce52093 100644
--- a/integ-tests/go/baml_client/inlinedbaml.go
+++ b/integ-tests/go/baml_client/inlinedbaml.go
@@ -92,7 +92,7 @@ var file_map = map[string]string{
   "test-files/models/deepseek-azure.baml": "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
   "test-files/not-valid-json-1559/not-valid-json.baml": "class Document1559 {\n  client_details ClientDetails1559\n  notes Note1599[]\n}\n\nclass ClientDetails1559 {\n  client_name string?\n  client_address string?\n  client_postal_code string?\n  client_city string?\n  client_country string?\n  client_phone string?\n  client_email string?\n}\n\nclass Note1599 {\n  note_title string\n  note_description string?\n  note_amount string? @description(\"If there is a quantity, specify it here\")\n}\n\nfunction DescribeMedia1599(img: image, client_sector: string, client_name: string) -> string {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert at describing media.\n    {{_.role(\"user\")}}\n    Describe this image {{img}} for client {{ client_name }} in sector {{ client_sector }}.\n  \"#\n}\n\nfunction StructureDocument1559(document_txt: string) -> Document1559 {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert in structuring notes.\n    {{_.role(\"user\")}}\n    Here is the text you need to structure:\n    {{ document_txt }}\n\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestDescribeMedia1559 {\n  functions [DescribeMedia1599]\n  args {\n    img { file \"./notes.png\" }\n    client_sector #\"\n      roofer\n    \"#\n    client_name #\"\n      The Vroe Group\n    \"#\n  }\n}\n\ntest TestStructureDocument1559 {\n  functions [StructureDocument1559]\n  args {\n    // Test arguments would go here\n  }\n}",
   "test-files/providers/anthropic.baml": "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}\n\nclass CustomStory {\n  title string\n  characters string[]\n  content string\n}\n\nfunction TestThinking(input: string) -> CustomStory {\n  client SonnetThinking\n  prompt #\"\n    {{ _.role('system') }}\n    Generate the following story\n    {{ ctx.output_format }}\n\n    {{ _.role('user') }}\n    {{ input }}\n  \"#\n}",
-  "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}",
+  "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInferenceProfile(input: string) -> string {\n  client AwsBedrockInferenceProfileClient\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\ntest TestName {\n  functions [TestAwsInferenceProfile]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n\n\nclient<llm> AwsBedrockInferenceProfileClient {\n  provider \"aws-bedrock\"\n  options {\n    model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n  }\n}\n",
   "test-files/providers/azure.baml": "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
   "test-files/providers/dummy-clients.baml": "client OpenAIDummyClient {\n  provider openai-generic\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction TestOpenAIDummyClient(input: string) -> string {\n  client OpenAIDummyClient\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ input }}\n  \"#\n}",
   "test-files/providers/gemini.baml": "function TestGemini(input: string) -> string {\n  client Gemini\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystem(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }}\n\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystemAsChat(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }} You are a helpful assistant\n\n    {{_.role(\"user\")}} Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiOpenAiGeneric() -> string {\n  client GeminiOpenAiGeneric\n  prompt #\"{{_.role(\"system\")}} You are a helpful assistant\n  {{_.role(\"user\")}} Write a poem about llamas\n  \"#\n}\n\ntest TestName {\n  functions [TestGeminiSystem]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
diff --git a/integ-tests/openapi/baml_client/openapi.yaml b/integ-tests/openapi/baml_client/openapi.yaml
index 3482e4cb3f..34edb9110c 100644
--- a/integ-tests/openapi/baml_client/openapi.yaml
+++ b/integ-tests/openapi/baml_client/openapi.yaml
@@ -1527,6 +1527,19 @@ paths:
                 title: TestAwsResponse
                 type: string
       operationId: TestAws
+  /call/TestAwsInferenceProfile:
+    post:
+      requestBody:
+        $ref: '#/components/requestBodies/TestAwsInferenceProfile'
+      responses:
+        '200':
+          description: Successful operation
+          content:
+            application/json:
+              schema:
+                title: TestAwsInferenceProfileResponse
+                type: string
+      operationId: TestAwsInferenceProfile
   /call/TestAwsInvalidAccessKey:
     post:
       requestBody:
@@ -4146,6 +4159,22 @@ components:
             required:
             - input
             additionalProperties: false
+    TestAwsInferenceProfile:
+      required: true
+      content:
+        application/json:
+          schema:
+            title: TestAwsInferenceProfileRequest
+            type: object
+            properties:
+              input:
+                type: string
+              __baml_options__:
+                nullable: true
+                $ref: '#/components/schemas/BamlOptions'
+            required:
+            - input
+            additionalProperties: false
     TestAwsInvalidAccessKey:
       required: true
       content:
diff --git a/integ-tests/python/aws-utils.py b/integ-tests/python/aws-utils.py
new file mode 100644
index 0000000000..8fd025d1a4
--- /dev/null
+++ b/integ-tests/python/aws-utils.py
@@ -0,0 +1,36 @@
+import asyncio
+
+import boto3
+from baml_client import b
+
+client = boto3.client("bedrock")
+
+
+def create_inference_profile():
+    response = client.create_inference_profile(
+        inferenceProfileName="Claude37",
+        description="Application profile for Claude 3.7 Sonnet",
+        modelSource={
+            "copyFrom": "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-5-sonnet-20241022-v2:0"
+        },
+    )
+
+    return response
+
+
+async def test_bedrock_inference_profile():
+    res = await b.TestAwsInferenceProfile("Hello, world!")
+    print(res)
+
+
+async def test_bedrock_inference_profile_streaming():
+    res = b.stream.TestAwsInferenceProfile("Hello, world!")
+    async for chunk in res:
+        print(chunk)
+
+
+if __name__ == "__main__":
+    # profile = create_inference_profile()
+    # print(f"Created inference profile: {profile}")
+    # asyncio.run(test_bedrock_inference_profile())
+    asyncio.run(test_bedrock_inference_profile_streaming())
diff --git a/integ-tests/python/baml_client/async_client.py b/integ-tests/python/baml_client/async_client.py
index 54eb20539a..9a30ac5955 100644
--- a/integ-tests/python/baml_client/async_client.py
+++ b/integ-tests/python/baml_client/async_client.py
@@ -2989,6 +2989,33 @@ async def TestAws(
       )
       return cast(str, raw.cast_to(types, types, partial_types, False))
     
+    async def TestAwsInferenceProfile(
+        self,
+        input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+      options: BamlCallOptions = {**self.__baml_options, **(baml_options or {})}
+
+      __tb__ = options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = options.get("client_registry", None)
+      collector = options.get("collector", None)
+      collectors = collector if isinstance(collector, list) else [collector] if collector is not None else []
+      raw = await self.__runtime.call_function(
+        "TestAwsInferenceProfile",
+        {
+          "input": input,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+        collectors,
+      )
+      return cast(str, raw.cast_to(types, types, partial_types, False))
+    
     async def TestAwsInvalidAccessKey(
         self,
         input: str,
@@ -8268,6 +8295,39 @@ def TestAws(
         self.__ctx_manager.get(),
       )
     
+    def TestAwsInferenceProfile(
+        self,
+        input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlStream[Optional[str], str]:
+      options: BamlCallOptions = {**self.__baml_options, **(baml_options or {})}
+      __tb__ = options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = options.get("client_registry", None)
+      collector = options.get("collector", None)
+      collectors = collector if isinstance(collector, list) else [collector] if collector is not None else []
+      raw = self.__runtime.stream_function(
+        "TestAwsInferenceProfile",
+        {
+          "input": input,
+        },
+        None,
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+        collectors,
+      )
+
+      return baml_py.BamlStream[Optional[str], str](
+        raw,
+        lambda x: cast(Optional[str], x.cast_to(types, types, partial_types, True)),
+        lambda x: cast(str, x.cast_to(types, types, partial_types, False)),
+        self.__ctx_manager.get(),
+      )
+    
     def TestAwsInvalidAccessKey(
         self,
         input: str,
diff --git a/integ-tests/python/baml_client/async_request.py b/integ-tests/python/baml_client/async_request.py
index 1fde62ae33..572501f427 100644
--- a/integ-tests/python/baml_client/async_request.py
+++ b/integ-tests/python/baml_client/async_request.py
@@ -2506,6 +2506,29 @@ async def TestAws(
         False,
       )
     
+    async def TestAwsInferenceProfile(
+        self,
+        input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.HTTPRequest:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      return await self.__runtime.build_request(
+        "TestAwsInferenceProfile",
+        {
+          "input": input,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+        False,
+      )
+    
     async def TestAwsInvalidAccessKey(
         self,
         input: str,
@@ -6454,6 +6477,29 @@ async def TestAws(
         True,
       )
     
+    async def TestAwsInferenceProfile(
+        self,
+        input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.HTTPRequest:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      return await self.__runtime.build_request(
+        "TestAwsInferenceProfile",
+        {
+          "input": input,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+        True,
+      )
+    
     async def TestAwsInvalidAccessKey(
         self,
         input: str,
diff --git a/integ-tests/python/baml_client/inlinedbaml.py b/integ-tests/python/baml_client/inlinedbaml.py
index c129856e7c..508402a5d6 100644
--- a/integ-tests/python/baml_client/inlinedbaml.py
+++ b/integ-tests/python/baml_client/inlinedbaml.py
@@ -96,7 +96,7 @@
     "test-files/models/deepseek-azure.baml": "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
     "test-files/not-valid-json-1559/not-valid-json.baml": "class Document1559 {\n  client_details ClientDetails1559\n  notes Note1599[]\n}\n\nclass ClientDetails1559 {\n  client_name string?\n  client_address string?\n  client_postal_code string?\n  client_city string?\n  client_country string?\n  client_phone string?\n  client_email string?\n}\n\nclass Note1599 {\n  note_title string\n  note_description string?\n  note_amount string? @description(\"If there is a quantity, specify it here\")\n}\n\nfunction DescribeMedia1599(img: image, client_sector: string, client_name: string) -> string {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert at describing media.\n    {{_.role(\"user\")}}\n    Describe this image {{img}} for client {{ client_name }} in sector {{ client_sector }}.\n  \"#\n}\n\nfunction StructureDocument1559(document_txt: string) -> Document1559 {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert in structuring notes.\n    {{_.role(\"user\")}}\n    Here is the text you need to structure:\n    {{ document_txt }}\n\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestDescribeMedia1559 {\n  functions [DescribeMedia1599]\n  args {\n    img { file \"./notes.png\" }\n    client_sector #\"\n      roofer\n    \"#\n    client_name #\"\n      The Vroe Group\n    \"#\n  }\n}\n\ntest TestStructureDocument1559 {\n  functions [StructureDocument1559]\n  args {\n    // Test arguments would go here\n  }\n}",
     "test-files/providers/anthropic.baml": "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}\n\nclass CustomStory {\n  title string\n  characters string[]\n  content string\n}\n\nfunction TestThinking(input: string) -> CustomStory {\n  client SonnetThinking\n  prompt #\"\n    {{ _.role('system') }}\n    Generate the following story\n    {{ ctx.output_format }}\n\n    {{ _.role('user') }}\n    {{ input }}\n  \"#\n}",
-    "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}",
+    "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInferenceProfile(input: string) -> string {\n  client AwsBedrockInferenceProfileClient\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\ntest TestName {\n  functions [TestAwsInferenceProfile]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n\n\nclient<llm> AwsBedrockInferenceProfileClient {\n  provider \"aws-bedrock\"\n  options {\n    model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n  }\n}\n",
     "test-files/providers/azure.baml": "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
     "test-files/providers/dummy-clients.baml": "client OpenAIDummyClient {\n  provider openai-generic\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction TestOpenAIDummyClient(input: string) -> string {\n  client OpenAIDummyClient\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ input }}\n  \"#\n}",
     "test-files/providers/gemini.baml": "function TestGemini(input: string) -> string {\n  client Gemini\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystem(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }}\n\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystemAsChat(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }} You are a helpful assistant\n\n    {{_.role(\"user\")}} Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiOpenAiGeneric() -> string {\n  client GeminiOpenAiGeneric\n  prompt #\"{{_.role(\"system\")}} You are a helpful assistant\n  {{_.role(\"user\")}} Write a poem about llamas\n  \"#\n}\n\ntest TestName {\n  functions [TestGeminiSystem]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
diff --git a/integ-tests/python/baml_client/parser.py b/integ-tests/python/baml_client/parser.py
index aad8b7de4d..660ab87975 100644
--- a/integ-tests/python/baml_client/parser.py
+++ b/integ-tests/python/baml_client/parser.py
@@ -2819,6 +2819,32 @@ def TestAws(
 
       return cast(str, parsed)
     
+    def TestAwsInferenceProfile(
+        self,
+        llm_response: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      parsed = self.__runtime.parse_llm_response(
+        "TestAwsInferenceProfile",
+        llm_response,
+        types,
+        types,
+        partial_types,
+        False,
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+
+      return cast(str, parsed)
+    
     def TestAwsInvalidAccessKey(
         self,
         llm_response: str,
@@ -7276,6 +7302,32 @@ def TestAws(
 
       return cast(Optional[str], parsed)
     
+    def TestAwsInferenceProfile(
+        self,
+        llm_response: str,
+        baml_options: BamlCallOptions = {},
+    ) -> Optional[str]:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      parsed = self.__runtime.parse_llm_response(
+        "TestAwsInferenceProfile",
+        llm_response,
+        types,
+        types,
+        partial_types,
+        True,
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+
+      return cast(Optional[str], parsed)
+    
     def TestAwsInvalidAccessKey(
         self,
         llm_response: str,
diff --git a/integ-tests/python/baml_client/sync_client.py b/integ-tests/python/baml_client/sync_client.py
index 2f6b1db1d1..839227d5be 100644
--- a/integ-tests/python/baml_client/sync_client.py
+++ b/integ-tests/python/baml_client/sync_client.py
@@ -2987,6 +2987,33 @@ def TestAws(
       )
       return cast(str, raw.cast_to(types, types, partial_types, False))
     
+    def TestAwsInferenceProfile(
+        self,
+        input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+      options: BamlCallOptions = {**self.__baml_options, **(baml_options or {})}
+      __tb__ = options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = options.get("client_registry", None)
+      collector = options.get("collector", None)
+      collectors = collector if isinstance(collector, list) else [collector] if collector is not None else []
+
+      raw = self.__runtime.call_function_sync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+        collectors,
+      )
+      return cast(str, raw.cast_to(types, types, partial_types, False))
+    
     def TestAwsInvalidAccessKey(
         self,
         input: str,
@@ -8374,6 +8401,40 @@ def TestAws(
         self.__ctx_manager.get(),
       )
     
+    def TestAwsInferenceProfile(
+        self,
+        input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlSyncStream[Optional[str], str]:
+      options: BamlCallOptions = {**self.__baml_options, **(baml_options or {})}
+      __tb__ = options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = options.get("client_registry", None)
+      collector = options.get("collector", None)
+      collectors = collector if isinstance(collector, list) else [collector] if collector is not None else []
+
+      raw = self.__runtime.stream_function_sync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input,
+        },
+        None,
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+        collectors,
+      )
+
+      return baml_py.BamlSyncStream[Optional[str], str](
+        raw,
+        lambda x: cast(Optional[str], x.cast_to(types, types, partial_types, True)),
+        lambda x: cast(str, x.cast_to(types, types, partial_types, False)),
+        self.__ctx_manager.get(),
+      )
+    
     def TestAwsInvalidAccessKey(
         self,
         input: str,
diff --git a/integ-tests/python/baml_client/sync_request.py b/integ-tests/python/baml_client/sync_request.py
index 71c3929865..989d104da0 100644
--- a/integ-tests/python/baml_client/sync_request.py
+++ b/integ-tests/python/baml_client/sync_request.py
@@ -2498,6 +2498,29 @@ def TestAws(
         False,
       )
     
+    def TestAwsInferenceProfile(
+        self,
+        input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.HTTPRequest:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      return self.__runtime.build_request_sync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+        False,
+      )
+    
     def TestAwsInvalidAccessKey(
         self,
         input: str,
@@ -6442,6 +6465,29 @@ def TestAws(
         True,
       )
     
+    def TestAwsInferenceProfile(
+        self,
+        input: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.HTTPRequest:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      return self.__runtime.build_request_sync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+        True,
+      )
+    
     def TestAwsInvalidAccessKey(
         self,
         input: str,
diff --git a/integ-tests/python/pyproject.toml b/integ-tests/python/pyproject.toml
index 428569b5fe..c03a6323a4 100644
--- a/integ-tests/python/pyproject.toml
+++ b/integ-tests/python/pyproject.toml
@@ -20,6 +20,7 @@ dependencies = [
     "ruff",
     "anthropic (>=0.49.0,<0.50.0)",
     "google-genai (>=1.5.0,<2.0.0)",
+    "boto3>=1.37.37",
 ]
 
 [dependency-groups]
diff --git a/integ-tests/python/tests/test_functions.py b/integ-tests/python/tests/test_functions.py
index 34bce3ded0..d278632b48 100644
--- a/integ-tests/python/tests/test_functions.py
+++ b/integ-tests/python/tests/test_functions.py
@@ -575,6 +575,21 @@ async def test_aws():
     assert len(res) > 0, "Expected non-empty result but got empty."
 
 
+@pytest.mark.asyncio
+async def test_aws_inference_profile():
+    res = await b.TestAwsInferenceProfile("Hello, world!")
+    assert len(res) > 0, "Expected non-empty result but got empty."
+
+
+@pytest.mark.asyncio
+async def test_aws_streaming():
+    res = b.stream.TestAws("Tell me a story in 8 sentences.")
+    chunks = []
+    async for chunk in res:
+        chunks.append(chunk)
+    assert len(chunks) > 1, "Expected more than one stream chunk."
+
+
 @pytest.mark.asyncio
 async def test_openai_shorthand():
     res = await b.TestOpenAIShorthand(input="Mt Rainier is tall")
@@ -611,12 +626,6 @@ async def test_fallback_to_shorthand():
     assert len(res) > 0, "Expected non-empty result but got empty."
 
 
-@pytest.mark.asyncio
-async def test_aws_streaming():
-    res = await b.stream.TestAws(input="Mt Rainier is tall").get_final_response()
-    assert len(res) > 0, "Expected non-empty result but got empty."
-
-
 @pytest.mark.asyncio
 async def test_streaming():
     stream = b.stream.PromptTestStreaming(
@@ -636,21 +645,20 @@ async def test_streaming():
 
     final = await stream.get_final_response()
 
-    assert first_msg_time - start_time <= 1.5, (
-        "Expected first message within 1 second but it took longer."
-    )
-    assert last_msg_time - start_time >= 1, (
-        "Expected last message after 1.5 seconds but it was earlier."
-    )
+    assert (
+        first_msg_time - start_time <= 1.5
+    ), "Expected first message within 1 second but it took longer."
+    assert (
+        last_msg_time - start_time >= 1
+    ), "Expected last message after 1.5 seconds but it was earlier."
     assert len(final) > 0, "Expected non-empty final but got empty."
     assert len(msgs) > 0, "Expected at least one streamed response but got none."
     for prev_msg, msg in zip(msgs, msgs[1:]):
-        assert msg.startswith(prev_msg), (
-            "Expected messages to be continuous, but prev was %r and next was %r"
-            % (
-                prev_msg,
-                msg,
-            )
+        assert msg.startswith(
+            prev_msg
+        ), "Expected messages to be continuous, but prev was %r and next was %r" % (
+            prev_msg,
+            msg,
         )
     assert msgs[-1] == final, "Expected last stream message to match final response."
 
@@ -681,21 +689,20 @@ def test_streaming_sync():
 
     final = stream.get_final_response()
 
-    assert first_msg_time - start_time <= 1.5, (
-        "Expected first message within 1 second but it took longer."
-    )
-    assert last_msg_time - start_time >= 1, (
-        "Expected last message after 1.5 seconds but it was earlier."
-    )
+    assert (
+        first_msg_time - start_time <= 1.5
+    ), "Expected first message within 1 second but it took longer."
+    assert (
+        last_msg_time - start_time >= 1
+    ), "Expected last message after 1.5 seconds but it was earlier."
     assert len(final) > 0, "Expected non-empty final but got empty."
     assert len(msgs) > 0, "Expected at least one streamed response but got none."
     for prev_msg, msg in zip(msgs, msgs[1:]):
-        assert msg.startswith(prev_msg), (
-            "Expected messages to be continuous, but prev was %r and next was %r"
-            % (
-                prev_msg,
-                msg,
-            )
+        assert msg.startswith(
+            prev_msg
+        ), "Expected messages to be continuous, but prev was %r and next was %r" % (
+            prev_msg,
+            msg,
         )
     assert msgs[-1] == final, "Expected last stream message to match final response."
 
@@ -718,12 +725,11 @@ async def test_streaming_claude():
     assert len(final) > 0, "Expected non-empty final but got empty."
     assert len(msgs) > 0, "Expected at least one streamed response but got none."
     for prev_msg, msg in zip(msgs, msgs[1:]):
-        assert msg.startswith(prev_msg), (
-            "Expected messages to be continuous, but prev was %r and next was %r"
-            % (
-                prev_msg,
-                msg,
-            )
+        assert msg.startswith(
+            prev_msg
+        ), "Expected messages to be continuous, but prev was %r and next was %r" % (
+            prev_msg,
+            msg,
         )
     print("msgs:")
     print(msgs[-1])
@@ -744,12 +750,11 @@ async def test_streaming_gemini():
     assert len(final) > 0, "Expected non-empty final but got empty."
     assert len(msgs) > 0, "Expected at least one streamed response but got none."
     for prev_msg, msg in zip(msgs, msgs[1:]):
-        assert msg.startswith(prev_msg), (
-            "Expected messages to be continuous, but prev was %r and next was %r"
-            % (
-                prev_msg,
-                msg,
-            )
+        assert msg.startswith(
+            prev_msg
+        ), "Expected messages to be continuous, but prev was %r and next was %r" % (
+            prev_msg,
+            msg,
         )
     print("msgs:")
     print(msgs[-1])
@@ -1457,9 +1462,9 @@ async def test_caching():
     print("Duration no caching: ", duration)
     print("Duration with caching: ", duration2)
 
-    assert duration2 < duration, (
-        f"{duration2} < {duration}. Expected second call to be faster than first by a large margin."
-    )
+    assert (
+        duration2 < duration
+    ), f"{duration2} < {duration}. Expected second call to be faster than first by a large margin."
 
 
 @pytest.mark.asyncio
@@ -1527,9 +1532,9 @@ async def test_baml_validation_error_format():
         except errors.BamlValidationError as e:
             print("Error: ", e)
             assert hasattr(e, "prompt"), "Error object should have 'prompt' attribute"
-            assert hasattr(e, "raw_output"), (
-                "Error object should have 'raw_output' attribute"
-            )
+            assert hasattr(
+                e, "raw_output"
+            ), "Error object should have 'raw_output' attribute"
             assert hasattr(e, "message"), "Error object should have 'message' attribute"
             assert 'Say "hello there"' in e.prompt
 
@@ -1616,7 +1621,8 @@ async def test_differing_unions():
 @pytest.mark.asyncio
 async def test_add_baml_existing_class():
     tb = TypeBuilder()
-    tb.add_baml("""
+    tb.add_baml(
+        """
         class ExtraPersonInfo {
             height int
             weight int
@@ -1626,7 +1632,8 @@ class ExtraPersonInfo {
             age int?
             extra ExtraPersonInfo?
         }
-    """)
+    """
+    )
     res = await b.ExtractPeople(
         "My name is John Doe. I'm 30 years old. I'm 6 feet tall and weigh 180 pounds. My hair is yellow.",
         {"tb": tb},
@@ -1644,12 +1651,14 @@ class ExtraPersonInfo {
 @pytest.mark.asyncio
 async def test_add_baml_existing_enum():
     tb = TypeBuilder()
-    tb.add_baml("""
+    tb.add_baml(
+        """
         dynamic enum Hobby {
             VideoGames
             BikeRiding
         }
-    """)
+    """
+    )
     res = await b.ExtractHobby("I play video games", {"tb": tb})
     assert res == ["VideoGames"]
 
@@ -1657,7 +1666,8 @@ async def test_add_baml_existing_enum():
 @pytest.mark.asyncio
 async def test_add_baml_both_classes_and_enums():
     tb = TypeBuilder()
-    tb.add_baml("""
+    tb.add_baml(
+        """
         class ExtraPersonInfo {
             height int
             weight int
@@ -1684,7 +1694,8 @@ class ExtraPersonInfo {
             job Job?
             hobbies Hobby[]
         }
-    """)
+    """
+    )
     res = await b.ExtractPeople(
         "My name is John Doe. I'm 30 years old. My height is 6 feet and I weigh 180 pounds. My hair is brown. I work as a programmer and enjoy bike riding.",
         {"tb": tb},
@@ -1704,7 +1715,8 @@ class ExtraPersonInfo {
 @pytest.mark.asyncio
 async def test_add_baml_with_attrs():
     tb = TypeBuilder()
-    tb.add_baml("""
+    tb.add_baml(
+        """
         class ExtraPersonInfo {
             height int @description("In centimeters and rounded to the nearest whole number")
             weight int @description("In kilograms and rounded to the nearest whole number")
@@ -1713,7 +1725,8 @@ class ExtraPersonInfo {
         dynamic class Person {
             extra ExtraPersonInfo?
         }
-    """)
+    """
+    )
     res = await b.ExtractPeople(
         "My name is John Doe. I'm 30 years old. I'm 6 feet tall and weigh 180 pounds. My hair is yellow.",
         {"tb": tb},
@@ -1731,21 +1744,25 @@ class ExtraPersonInfo {
 async def test_add_baml_error():
     tb = TypeBuilder()
     with pytest.raises(errors.BamlError):
-        tb.add_baml("""
+        tb.add_baml(
+            """
             dynamic Hobby {
                 VideoGames
                 BikeRiding
             }
-        """)
+        """
+        )
 
 
 @pytest.mark.asyncio
 async def test_add_baml_parser_error():
     tb = TypeBuilder()
     with pytest.raises(errors.BamlError):
-        tb.add_baml("""
+        tb.add_baml(
+            """
             syntaxerror
-        """)
+        """
+        )
 
 
 @pytest.mark.asyncio
diff --git a/integ-tests/python/uv.lock b/integ-tests/python/uv.lock
index 4ff7958a56..489ec479ad 100644
--- a/integ-tests/python/uv.lock
+++ b/integ-tests/python/uv.lock
@@ -1,5 +1,9 @@
 version = 1
 requires-python = ">=3.9, <4"
+resolution-markers = [
+    "python_full_version < '3.10'",
+    "python_full_version >= '3.10'",
+]
 
 [[package]]
 name = "annotated-types"
@@ -58,6 +62,35 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bb/2a/10164ed1f31196a2f7f3799368a821765c62851ead0e630ab52b8e14b4d0/blinker-1.8.2-py3-none-any.whl", hash = "sha256:1779309f71bf239144b9399d06ae925637cf6634cf6bd131104184531bf67c01", size = 9456 },
 ]
 
+[[package]]
+name = "boto3"
+version = "1.37.37"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+    { name = "jmespath" },
+    { name = "s3transfer" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/82/8c/2ca661db6c9e591d9dc46149b43a91385283c852436ccba62e199643e196/boto3-1.37.37.tar.gz", hash = "sha256:752d31105a45e3e01c8c68471db14ae439990b75a35e72b591ca528e2575b28f", size = 111666 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e4/5f/032d93e74949222ffbfbc3270f29a3ee423fe648de8a31c49cce0cbb0a09/boto3-1.37.37-py3-none-any.whl", hash = "sha256:d125cb11e22817f7a2581bade4bf7b75247b401888890239ceb5d3e902ccaf38", size = 139917 },
+]
+
+[[package]]
+name = "botocore"
+version = "1.37.37"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jmespath" },
+    { name = "python-dateutil" },
+    { name = "urllib3", version = "1.26.20", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/d0/70969515e3ae8ff0fcccf22827d5d131bc7b8729331127415cf8f2861d63/botocore-1.37.37.tar.gz", hash = "sha256:3eadde6fed95c4cb469cc39d1c3558528b7fa76d23e7e16d4bddc77250431a64", size = 13828530 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/17/602915b29cb695e1e66f65e33b1026f1534e49975d99ea4e32e58d963542/botocore-1.37.37-py3-none-any.whl", hash = "sha256:eb730ff978f47c02f0c8ed07bccdc0db6d8fa098ed32ac31bee1da0e9be480d1", size = 13495584 },
+]
+
 [[package]]
 name = "cachetools"
 version = "5.5.2"
@@ -311,7 +344,7 @@ name = "importlib-metadata"
 version = "8.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "zipp" },
+    { name = "zipp", marker = "python_full_version < '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/cd/12/33e59336dca5be0c398a7482335911a33aa0e20776128f038019f1a95f1b/importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7", size = 55304 }
 wheels = [
@@ -419,6 +452,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/b7/a3cde72c644fd1caf9da07fb38cf2c130f43484d8f91011940b7c4f42c8f/jiter-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1c0dfbd1be3cbefc7510102370d86e35d1d53e5a93d48519688b1bf0f761160a", size = 207527 },
 ]
 
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256 },
+]
+
 [[package]]
 name = "markupsafe"
 version = "2.1.5"
@@ -705,6 +747,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/82/62e2d63639ecb0fbe8a7ee59ef0bc69a4669ec50f6d3459f74ad4e4189a2/pytest_asyncio-0.23.8-py3-none-any.whl", hash = "sha256:50265d892689a5faefb84df80819d1ecef566eb3549cf915dfb33569359d1ce2", size = 17663 },
 ]
 
+[[package]]
+name = "python-dateutil"
+version = "2.9.0.post0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
+]
+
 [[package]]
 name = "python-dotenv"
 version = "1.0.1"
@@ -721,6 +775,7 @@ source = { editable = "." }
 dependencies = [
     { name = "anthropic" },
     { name = "assertpy" },
+    { name = "boto3" },
     { name = "flask" },
     { name = "flask-httpauth" },
     { name = "google-genai" },
@@ -744,6 +799,7 @@ dev = [
 requires-dist = [
     { name = "anthropic", specifier = ">=0.49.0,<0.50.0" },
     { name = "assertpy" },
+    { name = "boto3", specifier = ">=1.37.37" },
     { name = "flask", specifier = ">=3.0.3" },
     { name = "flask-httpauth", specifier = ">=4.8.0" },
     { name = "google-genai", specifier = ">=1.5.0,<2.0.0" },
@@ -769,7 +825,8 @@ dependencies = [
     { name = "certifi" },
     { name = "charset-normalizer" },
     { name = "idna" },
-    { name = "urllib3" },
+    { name = "urllib3", version = "1.26.20", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "urllib3", version = "2.2.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 }
 wheels = [
@@ -813,6 +870,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/35/85/338e603dc68e7d9994d5d84f24adbf69bae760ba5efd3e20f5ff2cec18da/ruff-0.9.10-py3-none-win_arm64.whl", hash = "sha256:5fd804c0327a5e5ea26615550e706942f348b197d5475ff34c19733aee4b2e69", size = 10436892 },
 ]
 
+[[package]]
+name = "s3transfer"
+version = "0.11.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c4/2b/5c9562795c2eb2b5f63536961754760c25bf0f34af93d36aa28dea2fb303/s3transfer-0.11.5.tar.gz", hash = "sha256:8c8aad92784779ab8688a61aefff3e28e9ebdce43142808eaa3f0b0f402f68b7", size = 149107 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/45/39/13402e323666d17850eca87e4cd6ecfcf9fd7809cac9efdcce10272fc29d/s3transfer-0.11.5-py3-none-any.whl", hash = "sha256:757af0f2ac150d3c75bc4177a32355c3862a98d20447b69a0161812992fe0bd4", size = 84782 },
+]
+
+[[package]]
+name = "six"
+version = "1.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 },
+]
+
 [[package]]
 name = "sniffio"
 version = "1.3.1"
@@ -852,10 +930,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 },
 ]
 
+[[package]]
+name = "urllib3"
+version = "1.26.20"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e4/e8/6ff5e6bc22095cfc59b6ea711b687e2b7ed4bdb373f7eeec370a97d7392f/urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32", size = 307380 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/33/cf/8435d5a7159e2a9c83a95896ed596f68cf798005fe107cc655b5c5c14704/urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e", size = 144225 },
+]
+
 [[package]]
 name = "urllib3"
 version = "2.2.3"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
 sdist = { url = "https://files.pythonhosted.org/packages/ed/63/22ba4ebfe7430b76388e7cd448d5478814d3032121827c12a2cc287e2260/urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9", size = 300677 }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/ce/d9/5f4c13cecde62396b0d3fe530a50ccea91e7dfc1ccf0e09c228841bb5ba8/urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac", size = 126338 },
diff --git a/integ-tests/react/baml_client/async_client.ts b/integ-tests/react/baml_client/async_client.ts
index dfe293bb57..b8ff82ab41 100644
--- a/integ-tests/react/baml_client/async_client.ts
+++ b/integ-tests/react/baml_client/async_client.ts
@@ -2544,6 +2544,29 @@ export class BamlAsyncClient {
     }
   }
   
+  async TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): Promise<string> {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const raw = await this.runtime.callFunction(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+      )
+      return raw.parsed(false) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   async TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
@@ -7133,6 +7156,35 @@ class BamlStreamClient {
     }
   }
   
+  TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, collector?: Collector | Collector[] }
+  ): BamlStream<string, string> {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const raw = this.runtime.streamFunction(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        undefined,
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+      )
+      return new BamlStream<string, string>(
+        raw,
+        (a): string => a,
+        (a): string => a,
+        this.ctxManager.cloneContext(),
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, collector?: Collector | Collector[] }
diff --git a/integ-tests/react/baml_client/async_request.ts b/integ-tests/react/baml_client/async_request.ts
index e634ff18e5..a437ff1cab 100644
--- a/integ-tests/react/baml_client/async_request.ts
+++ b/integ-tests/react/baml_client/async_request.ts
@@ -2171,6 +2171,26 @@ export class AsyncHttpRequest {
     }
   }
   
+  async TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): Promise<HTTPRequest> {
+    try {
+      return await this.runtime.buildRequest(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   async TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
@@ -5597,6 +5617,26 @@ export class AsyncHttpStreamRequest {
     }
   }
   
+  async TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): Promise<HTTPRequest> {
+    try {
+      return await this.runtime.buildRequest(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   async TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
diff --git a/integ-tests/react/baml_client/inlinedbaml.ts b/integ-tests/react/baml_client/inlinedbaml.ts
index d2ca42c380..49935ab67c 100644
--- a/integ-tests/react/baml_client/inlinedbaml.ts
+++ b/integ-tests/react/baml_client/inlinedbaml.ts
@@ -97,7 +97,7 @@ const fileMap = {
   "test-files/models/deepseek-azure.baml": "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
   "test-files/not-valid-json-1559/not-valid-json.baml": "class Document1559 {\n  client_details ClientDetails1559\n  notes Note1599[]\n}\n\nclass ClientDetails1559 {\n  client_name string?\n  client_address string?\n  client_postal_code string?\n  client_city string?\n  client_country string?\n  client_phone string?\n  client_email string?\n}\n\nclass Note1599 {\n  note_title string\n  note_description string?\n  note_amount string? @description(\"If there is a quantity, specify it here\")\n}\n\nfunction DescribeMedia1599(img: image, client_sector: string, client_name: string) -> string {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert at describing media.\n    {{_.role(\"user\")}}\n    Describe this image {{img}} for client {{ client_name }} in sector {{ client_sector }}.\n  \"#\n}\n\nfunction StructureDocument1559(document_txt: string) -> Document1559 {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert in structuring notes.\n    {{_.role(\"user\")}}\n    Here is the text you need to structure:\n    {{ document_txt }}\n\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestDescribeMedia1559 {\n  functions [DescribeMedia1599]\n  args {\n    img { file \"./notes.png\" }\n    client_sector #\"\n      roofer\n    \"#\n    client_name #\"\n      The Vroe Group\n    \"#\n  }\n}\n\ntest TestStructureDocument1559 {\n  functions [StructureDocument1559]\n  args {\n    // Test arguments would go here\n  }\n}",
   "test-files/providers/anthropic.baml": "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}\n\nclass CustomStory {\n  title string\n  characters string[]\n  content string\n}\n\nfunction TestThinking(input: string) -> CustomStory {\n  client SonnetThinking\n  prompt #\"\n    {{ _.role('system') }}\n    Generate the following story\n    {{ ctx.output_format }}\n\n    {{ _.role('user') }}\n    {{ input }}\n  \"#\n}",
-  "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}",
+  "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInferenceProfile(input: string) -> string {\n  client AwsBedrockInferenceProfileClient\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\ntest TestName {\n  functions [TestAwsInferenceProfile]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n\n\nclient<llm> AwsBedrockInferenceProfileClient {\n  provider \"aws-bedrock\"\n  options {\n    model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n  }\n}\n",
   "test-files/providers/azure.baml": "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
   "test-files/providers/dummy-clients.baml": "client OpenAIDummyClient {\n  provider openai-generic\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction TestOpenAIDummyClient(input: string) -> string {\n  client OpenAIDummyClient\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ input }}\n  \"#\n}",
   "test-files/providers/gemini.baml": "function TestGemini(input: string) -> string {\n  client Gemini\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystem(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }}\n\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystemAsChat(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }} You are a helpful assistant\n\n    {{_.role(\"user\")}} Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiOpenAiGeneric() -> string {\n  client GeminiOpenAiGeneric\n  prompt #\"{{_.role(\"system\")}} You are a helpful assistant\n  {{_.role(\"user\")}} Write a poem about llamas\n  \"#\n}\n\ntest TestName {\n  functions [TestGeminiSystem]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
diff --git a/integ-tests/react/baml_client/parser.ts b/integ-tests/react/baml_client/parser.ts
index d14425ff8c..efd602f879 100644
--- a/integ-tests/react/baml_client/parser.ts
+++ b/integ-tests/react/baml_client/parser.ts
@@ -1953,6 +1953,24 @@ export class LlmResponseParser {
     }
   }
   
+  TestAwsInferenceProfile(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): string {
+    try {
+      return this.runtime.parseLlmResponse(
+        "TestAwsInferenceProfile",
+        llmResponse,
+        false,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
@@ -5037,6 +5055,24 @@ export class LlmStreamParser {
     }
   }
   
+  TestAwsInferenceProfile(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): string {
+    try {
+      return this.runtime.parseLlmResponse(
+        "TestAwsInferenceProfile",
+        llmResponse,
+        true,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
diff --git a/integ-tests/react/baml_client/react/hooks.tsx b/integ-tests/react/baml_client/react/hooks.tsx
index 27662e4616..1af85ff0d4 100644
--- a/integ-tests/react/baml_client/react/hooks.tsx
+++ b/integ-tests/react/baml_client/react/hooks.tsx
@@ -5507,6 +5507,54 @@ export function useTestAws(
   }
   return useBamlAction(action, props)
 }
+/**
+ * A specialized hook for the TestAwsInferenceProfile BAML function that supports both streaming and non‑streaming responses.
+ *
+ * **Input Types:**
+ *
+ * - input: string
+ *
+ *
+ * **Return Type:**
+ * - **Non‑streaming:** string
+ * - **Streaming Partial:** string
+ * - **Streaming Final:** string
+ *
+ * **Usage Patterns:**
+ * 1. **Non‑streaming (Default)**
+ *    - Best for quick responses and simple UI updates.
+ * 2. **Streaming**
+ *    - Ideal for long‑running operations or real‑time feedback.
+ *
+ * **Edge Cases:**
+ * - Ensure robust error handling via `onError`.
+ * - Handle cases where partial data may be incomplete or missing.
+ *
+ * @example
+ * ```tsx
+ * // Basic non‑streaming usage:
+ * const { data, error, isLoading, mutate } = useTestAwsInferenceProfile({ stream: false});
+ *
+ * // Streaming usage:
+ * const { data, streamData, isLoading, error, mutate } = useTestAwsInferenceProfile({
+ *   stream: true | undefined,
+ *   onStreamData: (partial) => console.log('Partial update:', partial),
+ *   onFinalData: (final) => console.log('Final result:', final),
+ *   onError: (err) => console.error('Error:', err),
+ * });
+ * ```
+ */
+export function useTestAwsInferenceProfile(props: HookInput<'TestAwsInferenceProfile', { stream: false }>): HookOutput<'TestAwsInferenceProfile', { stream: false }>
+export function useTestAwsInferenceProfile(props?: HookInput<'TestAwsInferenceProfile', { stream?: true }>): HookOutput<'TestAwsInferenceProfile', { stream: true }>
+export function useTestAwsInferenceProfile(
+  props: HookInput<'TestAwsInferenceProfile', { stream?: boolean }> = {},
+): HookOutput<'TestAwsInferenceProfile', { stream: true }> | HookOutput<'TestAwsInferenceProfile', { stream: false }> {
+  let action = Actions.TestAwsInferenceProfile;
+  if (isStreamingProps(props)) {
+    action = StreamingActions.TestAwsInferenceProfile;
+  }
+  return useBamlAction(action, props)
+}
 /**
  * A specialized hook for the TestAwsInvalidAccessKey BAML function that supports both streaming and non‑streaming responses.
  *
diff --git a/integ-tests/react/baml_client/react/server.ts b/integ-tests/react/baml_client/react/server.ts
index c37da9aebc..69a1aa1feb 100644
--- a/integ-tests/react/baml_client/react/server.ts
+++ b/integ-tests/react/baml_client/react/server.ts
@@ -1979,6 +1979,24 @@ export const TestAws = async (
   );
 };
 
+/**
+ * Executes the "TestAwsInferenceProfile" BAML action.
+ *
+ * This server action calls the underlying BAML function "TestAwsInferenceProfile"
+ * with the specified parameters.
+ *
+ * @param { string } input - Input parameter.
+ *
+ * @returns {Promise<string>} A promise that resolves with the result of the action.
+ */
+export const TestAwsInferenceProfile = async (
+  input: string,
+): Promise<string> => {
+  return b.TestAwsInferenceProfile(
+    input,
+  );
+};
+
 /**
  * Executes the "TestAwsInvalidAccessKey" BAML action.
  *
diff --git a/integ-tests/react/baml_client/react/server_streaming.ts b/integ-tests/react/baml_client/react/server_streaming.ts
index ded6cc9a41..c827d49124 100644
--- a/integ-tests/react/baml_client/react/server_streaming.ts
+++ b/integ-tests/react/baml_client/react/server_streaming.ts
@@ -2086,6 +2086,25 @@ export const TestAws = async (
   return Promise.resolve(stream.toStreamable());
 };
 
+/**
+ * Executes the streaming variant of the "TestAwsInferenceProfile" BAML action.
+ *
+ * This action initiates a streaming response by calling the corresponding
+ * BAML stream function. The returned stream yields incremental updates.
+ *
+ * @param { string } input - Input parameter.
+ *
+ * @returns {ReadableStream<Uint8Array>} A stream that yields incremental updates from the action.
+ */
+export const TestAwsInferenceProfile = async (
+  input: string,
+): Promise<ReadableStream<Uint8Array>> => {
+  const stream = b.stream.TestAwsInferenceProfile(
+    input,
+  );
+  return Promise.resolve(stream.toStreamable());
+};
+
 /**
  * Executes the streaming variant of the "TestAwsInvalidAccessKey" BAML action.
  *
diff --git a/integ-tests/react/baml_client/react/server_streaming_types.ts b/integ-tests/react/baml_client/react/server_streaming_types.ts
index b22dcd3ba7..cb1a637e8e 100644
--- a/integ-tests/react/baml_client/react/server_streaming_types.ts
+++ b/integ-tests/react/baml_client/react/server_streaming_types.ts
@@ -131,6 +131,7 @@ export type StreamingServerTypes = {
   TestAnthropic: string,
   TestAnthropicShorthand: string,
   TestAws: string,
+  TestAwsInferenceProfile: string,
   TestAwsInvalidAccessKey: string,
   TestAwsInvalidProfile: string,
   TestAwsInvalidRegion: string,
diff --git a/integ-tests/react/baml_client/sync_client.ts b/integ-tests/react/baml_client/sync_client.ts
index d39177fc88..4afeb6a62a 100644
--- a/integ-tests/react/baml_client/sync_client.ts
+++ b/integ-tests/react/baml_client/sync_client.ts
@@ -2546,6 +2546,29 @@ export class BamlSyncClient {
     }
   }
   
+  TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): string {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const raw = this.runtime.callFunctionSync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
diff --git a/integ-tests/react/baml_client/sync_request.ts b/integ-tests/react/baml_client/sync_request.ts
index 95ed80721c..ea07d97707 100644
--- a/integ-tests/react/baml_client/sync_request.ts
+++ b/integ-tests/react/baml_client/sync_request.ts
@@ -2171,6 +2171,26 @@ export class HttpRequest {
     }
   }
   
+  TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): HTTPRequest {
+    try {
+      return this.runtime.buildRequestSync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
@@ -5597,6 +5617,26 @@ export class HttpStreamRequest {
     }
   }
   
+  TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): HTTPRequest {
+    try {
+      return this.runtime.buildRequestSync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
diff --git a/integ-tests/ruby/baml_client/client.rb b/integ-tests/ruby/baml_client/client.rb
index 1f74c02669..177aaeb4aa 100644
--- a/integ-tests/ruby/baml_client/client.rb
+++ b/integ-tests/ruby/baml_client/client.rb
@@ -5962,6 +5962,61 @@ def TestAws(
       (raw.parsed_using_types(Baml::Types, Baml::PartialTypes, false))
     end
 
+    sig {
+      params(
+        varargs: T.untyped,
+        input: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]))]
+      ).returns(String)
+    }
+    def TestAwsInferenceProfile(
+        *varargs,
+        input:,
+        baml_options: {}
+    )
+      if varargs.any?
+        
+        raise ArgumentError.new("TestAwsInferenceProfile may only be called with keyword arguments")
+      end
+      if (baml_options.keys - [:client_registry, :tb, :collector]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb, :collector): #{baml_options.keys - [:client_registry, :tb, :collector]}")
+      end
+
+      # Merge options from initialization with those passed to the method
+      # Passed options take precedence over initialization options
+      effective_options = {}
+
+      if @baml_options
+        effective_options = @baml_options.dup
+      end
+
+      # Override with any options passed to this specific call
+      baml_options.each do |key, value|
+        effective_options[key] = value
+      end
+
+      # Use the merged options for the rest of the method
+      baml_options = effective_options
+
+      collector = if baml_options[:collector]
+        baml_options[:collector].is_a?(Array) ? baml_options[:collector] : [baml_options[:collector]]
+      else
+        []
+      end
+
+      raw = @runtime.call_function(
+        "TestAwsInferenceProfile",
+        {
+          input: input,
+        },
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+        collector,
+      )
+      (raw.parsed_using_types(Baml::Types, Baml::PartialTypes, false))
+    end
+
     sig {
       params(
         varargs: T.untyped,
@@ -14315,6 +14370,51 @@ def TestAws(
       )
     end
 
+    sig {
+      params(
+        varargs: T.untyped,
+        input: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry, T.any(Baml::Collector, T::Array[Baml::Collector]))]
+      ).returns(Baml::BamlStream[String])
+    }
+    def TestAwsInferenceProfile(
+        *varargs,
+        input:,
+        baml_options: {}
+    )
+      if varargs.any?
+        
+        raise ArgumentError.new("TestAwsInferenceProfile may only be called with keyword arguments")
+      end
+      if (baml_options.keys - [:client_registry, :tb, :collector]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb, :collector): #{baml_options.keys - [:client_registry, :tb, :collector]}")
+      end
+
+      # Merge options from initialization with those passed to the method
+      baml_options = (@baml_options || {}).merge(baml_options)
+
+      collector = if baml_options[:collector]
+        baml_options[:collector].is_a?(Array) ? baml_options[:collector] : [baml_options[:collector]]
+      else
+        []
+      end
+
+      raw = @runtime.stream_function(
+        "TestAwsInferenceProfile",
+        {
+          input: input,
+        },
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+        collector,
+      )
+      Baml::BamlStream[T.nilable(String), String].new(
+        ffi_stream: raw,
+        ctx_manager: @ctx_manager
+      )
+    end
+
     sig {
       params(
         varargs: T.untyped,
diff --git a/integ-tests/ruby/baml_client/inlined.rb b/integ-tests/ruby/baml_client/inlined.rb
index 9d2f9cdf61..e9590a67bc 100644
--- a/integ-tests/ruby/baml_client/inlined.rb
+++ b/integ-tests/ruby/baml_client/inlined.rb
@@ -96,7 +96,7 @@ module Inlined
         "test-files/models/deepseek-azure.baml" => "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
         "test-files/not-valid-json-1559/not-valid-json.baml" => "class Document1559 {\n  client_details ClientDetails1559\n  notes Note1599[]\n}\n\nclass ClientDetails1559 {\n  client_name string?\n  client_address string?\n  client_postal_code string?\n  client_city string?\n  client_country string?\n  client_phone string?\n  client_email string?\n}\n\nclass Note1599 {\n  note_title string\n  note_description string?\n  note_amount string? @description(\"If there is a quantity, specify it here\")\n}\n\nfunction DescribeMedia1599(img: image, client_sector: string, client_name: string) -> string {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert at describing media.\n    {{_.role(\"user\")}}\n    Describe this image {{img}} for client {{ client_name }} in sector {{ client_sector }}.\n  \"#\n}\n\nfunction StructureDocument1559(document_txt: string) -> Document1559 {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert in structuring notes.\n    {{_.role(\"user\")}}\n    Here is the text you need to structure:\n    {{ document_txt }}\n\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestDescribeMedia1559 {\n  functions [DescribeMedia1599]\n  args {\n    img { file \"./notes.png\" }\n    client_sector #\"\n      roofer\n    \"#\n    client_name #\"\n      The Vroe Group\n    \"#\n  }\n}\n\ntest TestStructureDocument1559 {\n  functions [StructureDocument1559]\n  args {\n    // Test arguments would go here\n  }\n}",
         "test-files/providers/anthropic.baml" => "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}\n\nclass CustomStory {\n  title string\n  characters string[]\n  content string\n}\n\nfunction TestThinking(input: string) -> CustomStory {\n  client SonnetThinking\n  prompt #\"\n    {{ _.role('system') }}\n    Generate the following story\n    {{ ctx.output_format }}\n\n    {{ _.role('user') }}\n    {{ input }}\n  \"#\n}",
-        "test-files/providers/aws.baml" => "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}",
+        "test-files/providers/aws.baml" => "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInferenceProfile(input: string) -> string {\n  client AwsBedrockInferenceProfileClient\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\ntest TestName {\n  functions [TestAwsInferenceProfile]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n\n\nclient<llm> AwsBedrockInferenceProfileClient {\n  provider \"aws-bedrock\"\n  options {\n    model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n  }\n}\n",
         "test-files/providers/azure.baml" => "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
         "test-files/providers/dummy-clients.baml" => "client OpenAIDummyClient {\n  provider openai-generic\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction TestOpenAIDummyClient(input: string) -> string {\n  client OpenAIDummyClient\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ input }}\n  \"#\n}",
         "test-files/providers/gemini.baml" => "function TestGemini(input: string) -> string {\n  client Gemini\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystem(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }}\n\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystemAsChat(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }} You are a helpful assistant\n\n    {{_.role(\"user\")}} Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiOpenAiGeneric() -> string {\n  client GeminiOpenAiGeneric\n  prompt #\"{{_.role(\"system\")}} You are a helpful assistant\n  {{_.role(\"user\")}} Write a poem about llamas\n  \"#\n}\n\ntest TestName {\n  functions [TestGeminiSystem]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
diff --git a/integ-tests/ruby/baml_client/parser.rb b/integ-tests/ruby/baml_client/parser.rb
index 40d38f5590..af12b959b8 100644
--- a/integ-tests/ruby/baml_client/parser.rb
+++ b/integ-tests/ruby/baml_client/parser.rb
@@ -2491,6 +2491,29 @@ def TestAws(llm_response:, baml_options: {})
       )
     end
 
+    sig {
+      params(
+        llm_response: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
+      ).returns(String)
+    }
+    def TestAwsInferenceProfile(llm_response:, baml_options: {})
+      if (baml_options.keys - [:client_registry, :tb]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
+      end
+
+      @runtime.parse_llm_response(
+        "TestAwsInferenceProfile",
+        llm_response,
+        Baml::Types,
+        Baml::PartialTypes,
+        false,
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+      )
+    end
+
     sig {
       params(
         llm_response: String,
@@ -6436,6 +6459,29 @@ def TestAws(llm_response:, baml_options: {})
       )
     end
 
+    sig {
+      params(
+        llm_response: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
+      ).returns(T.nilable(String))
+    }
+    def TestAwsInferenceProfile(llm_response:, baml_options: {})
+      if (baml_options.keys - [:client_registry, :tb]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
+      end
+
+      @runtime.parse_llm_response(
+        "TestAwsInferenceProfile",
+        llm_response,
+        Baml::Types,
+        Baml::PartialTypes,
+        true,
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+      )
+    end
+
     sig {
       params(
         llm_response: String,
diff --git a/integ-tests/ruby/baml_client/request.rb b/integ-tests/ruby/baml_client/request.rb
index dc5a275004..6382c2eb8a 100644
--- a/integ-tests/ruby/baml_client/request.rb
+++ b/integ-tests/ruby/baml_client/request.rb
@@ -3347,6 +3347,37 @@ def TestAws(
       )
     end
 
+    sig {
+      params(
+        varargs: T.untyped,
+        input: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
+      ).returns(Baml::Ffi::HTTPRequest)
+    }
+    def TestAwsInferenceProfile(
+        *varargs,
+        input:,
+        baml_options: {}
+    )
+      if varargs.any?
+        raise ArgumentError.new("TestAwsInferenceProfile may only be called with keyword arguments")
+      end
+      if (baml_options.keys - [:client_registry, :tb]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
+      end
+
+      @runtime.build_request(
+        "TestAwsInferenceProfile",
+        {
+          input: input,
+        },
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+        false
+      )
+    end
+
     sig {
       params(
         varargs: T.untyped,
@@ -8660,6 +8691,37 @@ def TestAws(
       )
     end
 
+    sig {
+      params(
+        varargs: T.untyped,
+        input: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
+      ).returns(Baml::Ffi::HTTPRequest)
+    }
+    def TestAwsInferenceProfile(
+        *varargs,
+        input:,
+        baml_options: {}
+    )
+      if varargs.any?
+        raise ArgumentError.new("TestAwsInferenceProfile may only be called with keyword arguments")
+      end
+      if (baml_options.keys - [:client_registry, :tb]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
+      end
+
+      @runtime.build_request(
+        "TestAwsInferenceProfile",
+        {
+          input: input,
+        },
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+        true
+      )
+    end
+
     sig {
       params(
         varargs: T.untyped,
diff --git a/integ-tests/typescript/baml_client/async_client.ts b/integ-tests/typescript/baml_client/async_client.ts
index dfe293bb57..b8ff82ab41 100644
--- a/integ-tests/typescript/baml_client/async_client.ts
+++ b/integ-tests/typescript/baml_client/async_client.ts
@@ -2544,6 +2544,29 @@ export class BamlAsyncClient {
     }
   }
   
+  async TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): Promise<string> {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const raw = await this.runtime.callFunction(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+      )
+      return raw.parsed(false) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   async TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
@@ -7133,6 +7156,35 @@ class BamlStreamClient {
     }
   }
   
+  TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, collector?: Collector | Collector[] }
+  ): BamlStream<string, string> {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const raw = this.runtime.streamFunction(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        undefined,
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+      )
+      return new BamlStream<string, string>(
+        raw,
+        (a): string => a,
+        (a): string => a,
+        this.ctxManager.cloneContext(),
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry, collector?: Collector | Collector[] }
diff --git a/integ-tests/typescript/baml_client/async_request.ts b/integ-tests/typescript/baml_client/async_request.ts
index e634ff18e5..a437ff1cab 100644
--- a/integ-tests/typescript/baml_client/async_request.ts
+++ b/integ-tests/typescript/baml_client/async_request.ts
@@ -2171,6 +2171,26 @@ export class AsyncHttpRequest {
     }
   }
   
+  async TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): Promise<HTTPRequest> {
+    try {
+      return await this.runtime.buildRequest(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   async TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
@@ -5597,6 +5617,26 @@ export class AsyncHttpStreamRequest {
     }
   }
   
+  async TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): Promise<HTTPRequest> {
+    try {
+      return await this.runtime.buildRequest(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   async TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
diff --git a/integ-tests/typescript/baml_client/inlinedbaml.ts b/integ-tests/typescript/baml_client/inlinedbaml.ts
index d2ca42c380..49935ab67c 100644
--- a/integ-tests/typescript/baml_client/inlinedbaml.ts
+++ b/integ-tests/typescript/baml_client/inlinedbaml.ts
@@ -97,7 +97,7 @@ const fileMap = {
   "test-files/models/deepseek-azure.baml": "client<llm> DeepSeekAzure {\n    provider openai-generic\n    options {\n        base_url \"https://DeepSeek-R1-dtjbj.eastus2.models.ai.azure.com\"\n        api_key env.DEEPSEEK_AZURE_API_KEY\n        max_tokens 10\n    }\n}\n\nfunction TellStory(story: string) -> string {\n  client DeepSeekAzure\n  prompt #\"\n    You are a storyteller. Tell a story about the following:\n    {{ _.role(\"user\") }} {{ story }}\n  \"#\n}\n\ntest TellStory {\n  functions [TellStory]\n  args {\n    story #\"\n      Once upon a time, there was a cat who loved to play with yarn.\n    \"#\n  }\n}\n",
   "test-files/not-valid-json-1559/not-valid-json.baml": "class Document1559 {\n  client_details ClientDetails1559\n  notes Note1599[]\n}\n\nclass ClientDetails1559 {\n  client_name string?\n  client_address string?\n  client_postal_code string?\n  client_city string?\n  client_country string?\n  client_phone string?\n  client_email string?\n}\n\nclass Note1599 {\n  note_title string\n  note_description string?\n  note_amount string? @description(\"If there is a quantity, specify it here\")\n}\n\nfunction DescribeMedia1599(img: image, client_sector: string, client_name: string) -> string {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert at describing media.\n    {{_.role(\"user\")}}\n    Describe this image {{img}} for client {{ client_name }} in sector {{ client_sector }}.\n  \"#\n}\n\nfunction StructureDocument1559(document_txt: string) -> Document1559 {\n  client \"openai/gpt-4o-mini\"\n  prompt #\"\n    {{_.role(\"system\")}}\n    You are an expert in structuring notes.\n    {{_.role(\"user\")}}\n    Here is the text you need to structure:\n    {{ document_txt }}\n\n    {{ ctx.output_format }}\n  \"#\n}\n\ntest TestDescribeMedia1559 {\n  functions [DescribeMedia1599]\n  args {\n    img { file \"./notes.png\" }\n    client_sector #\"\n      roofer\n    \"#\n    client_name #\"\n      The Vroe Group\n    \"#\n  }\n}\n\ntest TestStructureDocument1559 {\n  functions [StructureDocument1559]\n  args {\n    // Test arguments would go here\n  }\n}",
   "test-files/providers/anthropic.baml": "function TestAnthropic(input: string) -> string {\n  client Claude\n  prompt #\"\n    Write a nice haiku about {{ input }}\n  \"#\n}\n\nfunction TestAnthropicShorthand(input: string) -> string {\n  client \"anthropic/claude-3-haiku-20240307\"\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestCaching(input: string, not_cached: string) -> string {\n  client ClaudeWithCaching\n  prompt #\"\n    {{ _.role('system', cache_control={\"type\": \"ephemeral\"}) }}\n    Generate the following story\n    {{ input }}\n\n    {# Haiku require 2048 tokens to cache -#}\n    {{ input }}\n\n    {{ _.role('user') }}\n    {{ not_cached }}\n  \"#\n}\n\nclass CustomStory {\n  title string\n  characters string[]\n  content string\n}\n\nfunction TestThinking(input: string) -> CustomStory {\n  client SonnetThinking\n  prompt #\"\n    {{ _.role('system') }}\n    Generate the following story\n    {{ ctx.output_format }}\n\n    {{ _.role('user') }}\n    {{ input }}\n  \"#\n}",
-  "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}",
+  "test-files/providers/aws.baml": "function TestAws(input: string) -> string {\n  client AwsBedrock\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\n/// my docs\nclass UniverseQuestion {\n  question string\n  answer string\n}\n\nclass UniverseQuestionInput {\n  question string\n}\n\nfunction TestUniverseQuestion(question: UniverseQuestionInput) -> UniverseQuestion {\n  client AwsBedrock\n  prompt #\"\n    You are a helpful assistant that answers questions about the universe.\n\n    {{ ctx.output_format }}\n\n    {{ _.role(\"user\")}}\n\n    Question: {{ question }}\n\n    Answer:\n  \"#\n}\n\n\nfunction TestAwsInvalidRegion(input: string) -> string {\n  client AwsBedrockInvalidRegion\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidAccessKey(input: string) -> string {\n  client AwsBedrockInvalidAccessKey\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidProfile(input: string) -> string {\n  client AwsBedrockInvalidProfile\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInvalidSessionToken(input: string) -> string {\n  client AwsBedrockInvalidSessionToken\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestAwsInferenceProfile(input: string) -> string {\n  client AwsBedrockInferenceProfileClient\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\ntest TestName {\n  functions [TestAwsInferenceProfile]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n\n\nclient<llm> AwsBedrockInferenceProfileClient {\n  provider \"aws-bedrock\"\n  options {\n    model \"arn:aws:bedrock:us-east-1:404337120808:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0\"\n  }\n}\n",
   "test-files/providers/azure.baml": "// Test standard Azure GPT-3.5 (should add default max_tokens)\nfunction TestAzure(input: string) -> string {\n  client GPT35Azure\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model without max_tokens (should not add default)\nfunction TestAzureO1NoMaxTokens(input: string) -> string {\n  client AzureO1\n  prompt #\"\n   {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_tokens (should keep user value)\nfunction TestAzureO1WithMaxTokens(input: string) -> string {\n  client AzureO1WithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O1 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO1WithMaxCompletionTokens(input: string) -> string {\n  client AzureO1WithMaxCompletionTokens\n  prompt #\"\n     {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test GPT-3.5 with explicit max_tokens (should keep user value)\nfunction TestAzureWithMaxTokens(input: string) -> string {\n  client GPT35AzureWithMaxTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test failure case with invalid resource name\nfunction TestAzureFailure(input: string) -> string {\n  client GPT35AzureFailed\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\nclient<llm> AzureWithNullMaxTokens {\n  provider azure-openai\n  options {\n    resource_name env.AZURE_OPENAI_RESOURCE_NAME\n    deployment_id env.AZURE_OPENAI_DEPLOYMENT_ID\n    api_version \"2024-02-01\"\n    max_tokens null\n  }\n}\n\n// Test O3 model without max_tokens (should not add default)\nfunction TestAzureO3NoMaxTokens(input: string) -> string {\n  client AzureO3\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Test O3 model with explicit max_completion_tokens (should keep user value)\nfunction TestAzureO3WithMaxCompletionTokens(input: string) -> string {\n  client AzureO3WithMaxCompletionTokens\n  prompt #\"\n    {{ _.role(\"user\") }}\n    Write a nice haiku, given the user input. Make sure to reference the input in the haiku.\n\n    Input: {{ input }}\n  \"#\n}\n\n// Add test cases to verify the behavior\ntest TestAzureClients {\n  functions [\n    TestAzure,\n    TestAzureO1NoMaxTokens,\n    TestAzureO1WithMaxTokens,\n    TestAzureWithMaxTokens,\n    TestAzureO1WithMaxCompletionTokens,\n    TestAzureO3NoMaxTokens,\n    TestAzureO3WithMaxCompletionTokens\n  ]\n  args {\n    input \"Cherry blossoms\"\n  }\n}\n\n// Test failure case separately\ntest TestAzureFailureCase {\n  functions [TestAzureFailure]\n  args {\n    input \"Cherry blossoms\"\n  }\n}",
   "test-files/providers/dummy-clients.baml": "client OpenAIDummyClient {\n  provider openai-generic\n  options {\n    api_key env.OPENAI_API_KEY\n    model \"gpt-4o-mini\"\n    base_url \"http://localhost:8000\"\n  }\n}\n\nfunction TestOpenAIDummyClient(input: string) -> string {\n  client OpenAIDummyClient\n  prompt #\"\n    {{ _.role(\"user\") }}\n    {{ input }}\n  \"#\n}",
   "test-files/providers/gemini.baml": "function TestGemini(input: string) -> string {\n  client Gemini\n  prompt #\"\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystem(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }}\n\n    Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiSystemAsChat(input: string) -> string {\n  client Gemini\n  prompt #\"\n    {{ _.role('system') }} You are a helpful assistant\n\n    {{_.role(\"user\")}} Write a nice short story about {{ input }}. Keep it to 15 words or less.\n  \"#\n}\n\nfunction TestGeminiOpenAiGeneric() -> string {\n  client GeminiOpenAiGeneric\n  prompt #\"{{_.role(\"system\")}} You are a helpful assistant\n  {{_.role(\"user\")}} Write a poem about llamas\n  \"#\n}\n\ntest TestName {\n  functions [TestGeminiSystem]\n  args {\n    input #\"\n      hello world\n    \"#\n  }\n}\n",
diff --git a/integ-tests/typescript/baml_client/parser.ts b/integ-tests/typescript/baml_client/parser.ts
index d14425ff8c..efd602f879 100644
--- a/integ-tests/typescript/baml_client/parser.ts
+++ b/integ-tests/typescript/baml_client/parser.ts
@@ -1953,6 +1953,24 @@ export class LlmResponseParser {
     }
   }
   
+  TestAwsInferenceProfile(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): string {
+    try {
+      return this.runtime.parseLlmResponse(
+        "TestAwsInferenceProfile",
+        llmResponse,
+        false,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
@@ -5037,6 +5055,24 @@ export class LlmStreamParser {
     }
   }
   
+  TestAwsInferenceProfile(
+      llmResponse: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): string {
+    try {
+      return this.runtime.parseLlmResponse(
+        "TestAwsInferenceProfile",
+        llmResponse,
+        true,
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      ) as string
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       llmResponse: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
diff --git a/integ-tests/typescript/baml_client/sync_client.ts b/integ-tests/typescript/baml_client/sync_client.ts
index d39177fc88..4afeb6a62a 100644
--- a/integ-tests/typescript/baml_client/sync_client.ts
+++ b/integ-tests/typescript/baml_client/sync_client.ts
@@ -2546,6 +2546,29 @@ export class BamlSyncClient {
     }
   }
   
+  TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): string {
+    try {
+      const options = { ...this.bamlOptions, ...(__baml_options__ || {}) }
+      const collector = options.collector ? (Array.isArray(options.collector) ? options.collector : [options.collector]) : [];
+      const raw = this.runtime.callFunctionSync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        options.tb?.__tb(),
+        options.clientRegistry,
+        collector,
+      )
+      return raw.parsed(false) as string
+    } catch (error: any) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
diff --git a/integ-tests/typescript/baml_client/sync_request.ts b/integ-tests/typescript/baml_client/sync_request.ts
index 95ed80721c..ea07d97707 100644
--- a/integ-tests/typescript/baml_client/sync_request.ts
+++ b/integ-tests/typescript/baml_client/sync_request.ts
@@ -2171,6 +2171,26 @@ export class HttpRequest {
     }
   }
   
+  TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): HTTPRequest {
+    try {
+      return this.runtime.buildRequestSync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        false,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions
@@ -5597,6 +5617,26 @@ export class HttpStreamRequest {
     }
   }
   
+  TestAwsInferenceProfile(
+      input: string,
+      __baml_options__?: BamlCallOptions
+  ): HTTPRequest {
+    try {
+      return this.runtime.buildRequestSync(
+        "TestAwsInferenceProfile",
+        {
+          "input": input
+        },
+        this.ctxManager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+        true,
+      )
+    } catch (error) {
+      throw toBamlError(error);
+    }
+  }
+  
   TestAwsInvalidAccessKey(
       input: string,
       __baml_options__?: BamlCallOptions