In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

## List avaliable amazon bedrock models

In [15]:

"""
Lists the available Amazon Bedrock models.
"""
import logging
import json
import re
import boto3


from botocore.exceptions import ClientError


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def list_foundation_models(bedrock_client):
    """
    Gets a list of available Amazon Bedrock foundation models.

    :return: The list of available bedrock foundation models.
    """

    try:
        response = bedrock_client.list_foundation_models()
        models = response["modelSummaries"]
        logger.info("Got %s foundation models.", len(models))
        return models

    except ClientError:
        logger.error("Couldn't list foundation models.")
        raise


def main():
    """Entry point for the example. Uses the AWS SDK for Python (Boto3)
    to create an Amazon Bedrock client. Then lists the available Bedrock models
    in the region set in the callers profile and credentials.
    """

    bedrock_client = boto3.client(service_name="bedrock", region_name="us-east-1")

    fm_models = list_foundation_models(bedrock_client)
    for model in fm_models:
        print(f"Model: {model['modelName']}")
        print(json.dumps(model, indent=2))
        print("---------------------------\n")

    logger.info("Done.")


if __name__ == "__main__":
    main()

INFO:__main__:Got 81 foundation models.
INFO:__main__:Done.


Model: Titan Text Large
{
  "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-tg1-large",
  "modelId": "amazon.titan-tg1-large",
  "modelName": "Titan Text Large",
  "providerName": "Amazon",
  "inputModalities": [
    "TEXT"
  ],
  "outputModalities": [
    "TEXT"
  ],
  "responseStreamingSupported": true,
  "customizationsSupported": [],
  "inferenceTypesSupported": [
    "ON_DEMAND"
  ],
  "modelLifecycle": {
    "status": "ACTIVE"
  }
}
---------------------------

Model: Titan Image Generator G1
{
  "modelArn": "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-image-generator-v1:0",
  "modelId": "amazon.titan-image-generator-v1:0",
  "modelName": "Titan Image Generator G1",
  "providerName": "Amazon",
  "inputModalities": [
    "TEXT",
    "IMAGE"
  ],
  "outputModalities": [
    "IMAGE"
  ],
  "customizationsSupported": [
    "FINE_TUNING"
  ],
  "inferenceTypesSupported": [
    "PROVISIONED"
  ],
  "modelLifecycle": {
    "status": "ACTIVE"
  }
}
----

## AWS sample code

## langchain_aws

In [12]:
# Use the Conversation API to send a text message to Amazon Nova.

import boto3
from botocore.exceptions import ClientError

# Create a Bedrock Runtime client in the AWS Region you want to use.
client = boto3.client("bedrock-runtime", region_name="us-east-1")

# Set the model ID, e.g., Amazon Nova Lite.
model_id = "amazon.nova-lite-v1:0"

# Start a conversation with the user message.
user_message = "Describe the purpose of a 'hello world' program in one line."
conversation = [
    {
        "role": "user",
        "content": [{"text": user_message}],
    }
]

try:
    # Send the message to the model, using a basic inference configuration.
    response = client.converse(
        modelId=model_id,
        messages=conversation,
        inferenceConfig={"maxTokens": 512, "temperature": 0.5, "topP": 0.9},
    )

    # Extract and print the response text.
    response_text = response["output"]["message"]["content"][0]["text"]
    print(response_text)

except (ClientError, Exception) as e:
    print(f"ERROR: Can't invoke '{model_id}'. Reason: {e}")
    exit(1)




The purpose of a 'hello world' program is to provide a simple introduction to the syntax and basic structure of a programming language.


## S3

### S3 - upload file & folder

In [None]:
import boto3

s3_client = boto3.client('s3', 
                  region_name='us-east-1')

def upload_file_to_s3(bucket_name, file_path, object_name):
    try:
        s3_client.upload_file(file_path, bucket_name, object_name)
        print(f"File '{file_path}' uploaded to bucket '{bucket_name}' as '{object_name}'.")
    except Exception as e:
        print(f"Error uploading file: {e}")

# upload folder to s3
def upload_folder_to_s3(bucket_name, folder_path):
    import os
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            object_name = os.path.relpath(file_path, folder_path)
            upload_file_to_s3(bucket_name, file_path, object_name)

upload_folder_to_s3('aiwave-hackathon-team', '../data')

### S3 - get information

In [None]:
import boto3

s3_client = boto3.client('s3', 
                  region_name='us-east-1')

def list_buckets():
    response = s3_client.list_buckets()
    buckets = response['Buckets']
    [print(f"Bucket Name: {bucket['Name']}") for bucket in buckets]
    return buckets

def get_objects(bucket_name, prefix=""):
    response = s3_client.list_objects_v2(
        Bucket=bucket_name,
        Prefix=prefix  # 可選：只列出特定開頭的
    )
    print(f"Objects in bucket '{bucket_name}':")
    if 'Contents' in response:
        for obj in response['Contents']:
            print(obj['Key'])
        return response['Contents']
    else:
        print("No objects found.")
        return []
        
# get s3 bucket-name arn
def get_s3_bucket_arn(bucket_name):
    return f"arn:aws:s3:::{bucket_name}"
# get s3 object arn
def get_s3_object_arn(bucket_name, object_name):
    return f"arn:aws:s3:::{bucket_name}/{object_name}"
# get s3 object url
def get_s3_object_url(bucket_name, object_name):
    return f"https://{bucket_name}.s3.amazonaws.com/{object_name}"

def get_s3_bucket_info(bucket_name):
    bucket_arn = get_s3_bucket_arn(bucket_name)
    objects = get_objects(bucket_name)
    object_arns = [get_s3_object_arn(bucket_name, obj['Key']) for obj in objects]
    object_urls = [get_s3_object_url(bucket_name, obj['Key']) for obj in objects]
    return {
        'bucket_name': bucket_name,
        'bucket_arn': bucket_arn,
        'objects': objects,
        'object_arns': object_arns,
        'object_urls': object_urls
    }


# get all buckets info
s3_bucket_info = get_s3_bucket_info("aiwave-hackathon-team")
print(f"Bucket Name: {s3_bucket_info['bucket_name']}")
print(f"Bucket ARN: {s3_bucket_info['bucket_arn']}")
print(f"Objects: {s3_bucket_info['objects']}")
print(f"Object ARNs: {s3_bucket_info['object_arns']}")
print(f"Object URLs: {s3_bucket_info['object_urls']}")
print("---------------------------\n")

### S3 - library

In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from lib import s3

# get all buckets info
s3_bucket_info = s3.get_s3_bucket_info("aiwave-hackathon-team")
print(f"Bucket Name: {s3_bucket_info['bucket_name']}")
print(f"Bucket ARN: {s3_bucket_info['bucket_arn']}")
print(f"Objects: {s3_bucket_info['objects']}")
print(f"Object ARNs: {s3_bucket_info['object_arns']}")
print(f"Object URLs: {s3_bucket_info['object_urls']}")
print("---------------------------\n")

Bucket Name: aiwave-hackathon-team
Bucket ARN: arn:aws:s3:::aiwave-hackathon-team
Objects: ['pdf/ASTM/ASTM A276∕A276M-24a Standard Specification for Stainless Steel Bars and Shapes.pdf', 'pdf/ASTM/ASTM A564∕A564M-19a Standard Specification for Hot-Rolled and Cold-Finished Age-Hardening Stainless Steel Bars and Shape.pdf', 'pdf/EN/EN 10088-3-2023 Stainless steels –Part 3 Technical delivery conditions for semi-finished products, bars,rods, wire, sections and bright products of corrosion.pdf', 'pdf/JIS/JIS_G_4303_2021_jp ステンレス鋼棒.pdf', 'pdf/SAE/SAE AMS5643W Steel, Corrosion-Resistant, Bars, Wire, Forgings, Tubing and Rings 16Cr-4.0Ni-0.30Cb(Nb)-4.0Cu (UNS S17400).pdf', 'spilt_pdf/ASTM_A276_A276M_24a_Stainless_Steel_Bars.pdf_1.pdf', 'spilt_pdf/ASTM_A276_A276M_24a_Stainless_Steel_Bars.pdf_2.pdf', 'spilt_pdf/ASTM_A276_A276M_24a_Stainless_Steel_Bars.pdf_3.pdf', 'spilt_pdf/ASTM_A276_A276M_24a_Stainless_Steel_Bars.pdf_4.pdf', 'spilt_pdf/ASTM_A276_A276M_24a_Stainless_Steel_Bars.pdf_5.pdf', 'spilt

## OpenSearch

In [1]:
import boto3
REGION = 'us-east-1'  # 你的 AWS region
COLLECTION_NAME = 'rag-database'
aoss = boto3.client('opensearchserverless', region_name=REGION)

In [2]:
# Step 1: List collections to find the ID
list_response = aoss.list_collections()
collection_id = None
collection_arn = None
for col in list_response['collectionSummaries']:
    if col['name'] == COLLECTION_NAME:
        collection_id = col['id']
        collection_arn = col['arn']
        break

print(f"Found Collection ID: {collection_id}")
print(f"Found Collection ARN: {collection_arn}")

Found Collection ID: zd8hok4ehavz1m21lk0i
Found Collection ARN: arn:aws:aoss:us-east-1:904375567622:collection/zd8hok4ehavz1m21lk0i


## Bedrock Knowledge base

In [31]:

KB_NAME = 'stainlness'
BEDROCK_USER_ARN = 'arn:aws:iam::904375567622:user/stainlness-rag'
BEDROCK_ROLE_ARN = 'arn:aws:iam::904375567622:role/bedrock_role'
EMBEDDING_MODEL_ARN = 'arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v2:0'
OPENSEARCH_COLLECTION_ARN = collection_arn
OPENSEARCH_COLLECTION_ID = collection_id
OPENSEARCH_VECTOR_INDEX_NAME = 'vector-index-1024'
KNOWLEDGE_BASE_ID = 'NBSD4E1XMO'
bedrock_agent = boto3.client('bedrock-agent', region_name=REGION)

In [34]:
# 建立 Knowledge Base
create_kb_response = bedrock_agent.create_knowledge_base(
    name=KB_NAME,
    description="Knowledge Base created with S3 as data source and OpenSearch as vector store.",
    roleArn=BEDROCK_ROLE_ARN,
    knowledgeBaseConfiguration={
        "type": "VECTOR",
        "vectorKnowledgeBaseConfiguration": {
            "embeddingModelArn": EMBEDDING_MODEL_ARN,
            "embeddingModelConfiguration": {
                "bedrockEmbeddingModelConfiguration": {
                    "embeddingDataType": "FLOAT32",
                    "dimensions": 1024
                }
            }
        }
    },
    storageConfiguration={
        "type": "OPENSEARCH_SERVERLESS",
        "opensearchServerlessConfiguration": {
            "collectionArn": OPENSEARCH_COLLECTION_ARN,
            "vectorIndexName": OPENSEARCH_VECTOR_INDEX_NAME,
            "fieldMapping": {
                "vectorField": "vector-index-1024",
                "textField": "text",
                "metadataField": "metadata"
            }
        }
    }
)

knowledge_base_id = create_kb_response['knowledgeBase']['knowledgeBaseId']
print(f"Knowledge Base Created Successfully! ID: {knowledge_base_id}")

Knowledge Base Created Successfully! ID: YQVYVZJI42


In [35]:
KNOWLEDGE_BASE_ID = knowledge_base_id

In [36]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from lib import s3

# get all buckets info
s3_bucket_info = s3.get_s3_bucket_info("aiwave-hackathon-team")
S3_BUCKET_ARN = s3_bucket_info['bucket_arn']
S3_PREFIX = 'pdf/EN/'  

In [37]:
# Create Data Source
create_ds_response = bedrock_agent.create_data_source(
    knowledgeBaseId=KNOWLEDGE_BASE_ID,
    name="S3-test-pdf-EN",  # 你自己取名字
    description="Data source from S3 bucket",
    dataSourceConfiguration={
        "type": "S3",
        "s3Configuration": {
            "bucketArn": S3_BUCKET_ARN,         # S3 bucket ARN
            "inclusionPrefixes": [S3_PREFIX] if S3_PREFIX else []  # Optional prefix
        }
    }
)

data_source_id = create_ds_response['dataSource']['dataSourceId']
print(f"Data Source Created Successfully! ID: {data_source_id}")

Data Source Created Successfully! ID: H2YIF5X7ZI


In [38]:
DATA_SOURCE_ID = data_source_id

In [39]:
# Start Ingestion Job
start_ingestion_response = bedrock_agent.start_ingestion_job(
    knowledgeBaseId=KNOWLEDGE_BASE_ID,
    dataSourceId=DATA_SOURCE_ID
)

ingestion_job_id = start_ingestion_response['ingestionJob']['ingestionJobId']
print(f"Ingestion Job Started! ID: {ingestion_job_id}")

Ingestion Job Started! ID: 0FTYNJDBO2


In [40]:
INGESTION_JOB_ID = ingestion_job_id

In [43]:
import time

while True:
    response = bedrock_agent.get_ingestion_job(
        knowledgeBaseId=KNOWLEDGE_BASE_ID,
        dataSourceId=DATA_SOURCE_ID,
        ingestionJobId=INGESTION_JOB_ID
    )
    status = response['ingestionJob']['status']
    print(f"Ingestion job status: {status}")
    if status in ['COMPLETE', 'FAIL']:
        break
    time.sleep(5)  # 每 5 秒查一次

Ingestion job status: COMPLETE


In [45]:
bedrock_runtime = boto3.client('bedrock-runtime', region_name=REGION)

response = bedrock_runtime.retrieve_and_generate(
    input={
        "knowledgeBaseId": KNOWLEDGE_BASE_ID,
        "retrievalQuery": {
            "text": "你的問題"
        }
    }
)

print(response)

AttributeError: 'BedrockRuntime' object has no attribute 'retrieve_and_generate'

## PDF

In [5]:
from PyPDF2 import PdfReader, PdfWriter
import os

def split_pdf_pages(input_pdf_path, output_folder):
    # 確保輸出資料夾存在
    os.makedirs(output_folder, exist_ok=True)
    
    # 讀取 PDF
    reader = PdfReader(input_pdf_path)
    input_pdf_name = os.path.splitext(os.path.basename(input_pdf_path))[0]
    for i, page in enumerate(reader.pages):
        writer = PdfWriter()
        writer.add_page(page)
        
        output_path = os.path.join(output_folder, f"{input_pdf_name}_{i+1}.pdf")
        with open(output_path, "wb") as f:
            writer.write(f)
        
        print(f"Saved: {output_path}")

# 使用範例
input_pdf = "../data/pdf/EN/EN 10088-3-2023 Stainless steels –Part 3 Technical delivery conditions for semi-finished products, bars,rods, wire, sections and bright products of corrosion.pdf"
output_dir = "split_pdfs"
split_pdf_pages(input_pdf, output_dir)

Saved: split_pdfs/EN 10088-3-2023 Stainless steels –Part 3 Technical delivery conditions for semi-finished products, bars,rods, wire, sections and bright products of corrosion_1.pdf
Saved: split_pdfs/EN 10088-3-2023 Stainless steels –Part 3 Technical delivery conditions for semi-finished products, bars,rods, wire, sections and bright products of corrosion_2.pdf
Saved: split_pdfs/EN 10088-3-2023 Stainless steels –Part 3 Technical delivery conditions for semi-finished products, bars,rods, wire, sections and bright products of corrosion_3.pdf
Saved: split_pdfs/EN 10088-3-2023 Stainless steels –Part 3 Technical delivery conditions for semi-finished products, bars,rods, wire, sections and bright products of corrosion_4.pdf
Saved: split_pdfs/EN 10088-3-2023 Stainless steels –Part 3 Technical delivery conditions for semi-finished products, bars,rods, wire, sections and bright products of corrosion_5.pdf
Saved: split_pdfs/EN 10088-3-2023 Stainless steels –Part 3 Technical delivery conditions f