# Install dependencies and prepare environment

In [None]:
!pip install -U sagemaker boto3

In [None]:
import boto3
import sagemaker
import time

sm = boto3.Session().client("sagemaker")
sess = sagemaker.session.Session()
role = sagemaker.get_execution_role()
region = sess._region_name
bucket = sess.default_bucket()
model_path_prefix = "nvidia-parakeet"

# Get sagemaker DLC
image_uri = sagemaker.image_uris.retrieve(
    framework="pytorch",
    region=region,
    py_version="py312",
    image_scope="inference",
    version="2.6.0",
    instance_type="ml.g5.2xlarge",
)

print("Sagemaker execution role:", role)
print("Deployment region:", region)
print("Sagemaker image_uri:", image_uri)
print("Model upload S3 path:", f"s3://{bucket}/{model_path_prefix}/")

# Set sagemaker env
model_name = "nvidia-parakeet-model"
endpoint_config_name = "nvidia-parakeet-endpoint-config"
endpoint_name = "nvidia-parakeet-endpoint"
print("Sagemaker model name:", model_name)
print("Sagemaker endpoint_config_name name:", endpoint_config_name)
print("Sagemaker endpoint_name name:", endpoint_name)

# Upload model inference code

In [None]:
!rm -rf model.tar.
!tar -zcvf model.tar.gz ./code --exclude='*.ipynb' --exclude='*/.ipynb_checkpoints'

In [None]:
model_data = sess.upload_data("model.tar.gz", bucket, model_path_prefix)
print(f"Code tar ball uploaded to ---> {model_data}")

# Deploy sagemaker resources

In [None]:
def create_model():
    resp = sm.create_model(
        ModelName=model_name,
        ExecutionRoleArn=role,
        Containers=[{"Image": image_uri, "ModelDataUrl": model_data}]
    )
    print(f"Created model: {resp}")

create_model()

In [None]:
def create_endpoint_config():
    resp = sm.create_endpoint_config(
        EndpointConfigName=endpoint_config_name,
        ProductionVariants=[
            {
                "VariantName": "AllTraffic",
                "ModelName": model_name,
                "InstanceType": "ml.g5.2xlarge",
                "InitialInstanceCount": 1,
                "ContainerStartupHealthCheckTimeoutInSeconds": 300
            }
        ],
    )
    print(f"Created Endpoint Config: {resp}")

create_endpoint_config()

In [None]:
def create_endpoint():
    resp = sm.create_endpoint(EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name)
    print(f"\nCreated Endpoint: {resp}")

create_endpoint()

In [None]:
print("Waiting for endpoint in service")
while True:
    details = sm.describe_endpoint(EndpointName=endpoint_name)
    status = details["EndpointStatus"]
    if status in ["InService", "Failed"]:
        print(f"\nDone! Status: {status}")
        break
    print(".", end="", flush=True)
    time.sleep(30)

# Invoke sagemaker endpoint

In [None]:
!pip install -U kaldiio

In [None]:
import json
import boto3
import kaldiio
import time
import sys

def prepare_audio_data(audio_path):
    sample_rate, wav_np = kaldiio.load_mat(audio_path)
    audio_bytes = wav_np.tobytes()
    print(f"Successfully loaded audio file: {audio_path}, sample rate: {sample_rate}")
    return audio_bytes

def invoke_endpoint(audio_bytes, endpoint_name):
    runtime_client = boto3.client('sagemaker-runtime')
    print(f"Invoking endpoint: {endpoint_name}")

    # Call the endpoint
    response = runtime_client.invoke_endpoint(
        EndpointName=endpoint_name,
        ContentType='application/octet-stream',
        Body=audio_bytes
    )

    # Parse response
    result = json.loads(response['Body'].read().decode())
    print("Endpoint invocation successful")
    return result

def main():
    # Get audio file path from command line arguments if provided
    audio_file = "2086-149220-0033.wav"

    print(f"Starting to process audio file: {audio_file}")
    audio_bytes = prepare_audio_data(audio_file)
        
    start_time = time.time()
    result = invoke_endpoint(audio_bytes,endpoint_name)
    end_time = time.time()
    time_used = round((end_time - start_time) * 1000)
    print(f"Inference time: {time_used}ms")
        
    if result:
        print("Model response:")
        print(json.dumps(result, ensure_ascii=False, indent=2))
    else:
        print("Invocation failed, no valid response received")
        sys.exit(1)

if __name__ == "__main__":
    main()


# Delete sagemaker resources

In [None]:
sm.delete_endpoint(EndpointName=endpoint_name)
sm.delete_endpoint_config(EndpointConfigName=endpoint_config_name)
sm.delete_model(ModelName=model_name)