In [None]:
def parse_sessions(file_path):
    sessions = []
    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    current_session = None
    current_synopsis = []

    for line in lines:
        line = line.strip()
        if '|' in line: 
            if current_session:
                sessions.append({
                    'title': current_session,
                    'synopsis': ' '.join(current_synopsis)
                })
            current_session = line
            current_synopsis = []
        elif line == '':
            continue
        elif current_session:
            current_synopsis.append(line)

    if current_session:
        sessions.append({
            'title': current_session,
            'synopsis': ' '.join(current_synopsis)
        })

    return sessions

file_path = 'reinvent.txt'
parsed_sessions = parse_sessions(file_path)

In [None]:
for session in parsed_sessions:
    print(f"Title: {session['title']}")
    print(f"Synopsis: {session['synopsis']}")
    print()

In [None]:
import boto3
from botocore.config import Config

region = 'us-west-2'
def init_bedrock_client(region: str):
    retry_config = Config(
        region_name=region,
        retries={"max_attempts": 10, "mode": "standard"}
    )
    return boto3.client("bedrock-runtime", region_name=region, config=retry_config)

def converse_with_bedrock_tools(sys_prompt, usr_prompt, tool_config):
    temperature = 0.0
    top_p = 0.1
    top_k = 1
    inference_config = {"temperature": temperature, "topP": top_p}
    additional_model_fields = {"top_k": top_k}
    response = boto3_client.converse(
        modelId="anthropic.claude-3-sonnet-20240229-v1:0", 
        #modelId="anthropic.claude-3-haiku-20240307-v1:0",
        messages=usr_prompt, 
        system=sys_prompt,
        inferenceConfig=inference_config,
        additionalModelRequestFields=additional_model_fields,
        toolConfig=tool_config
    )
    return response

def create_prompt(sys_template, user_template, **kwargs):
    sys_prompt = [{"text": sys_template.format(**kwargs)}]
    usr_prompt = [{"role": "user", "content": [{"text": user_template.format(**kwargs)}]}]
    return sys_prompt, usr_prompt

boto3_client = init_bedrock_client(region)

In [None]:
topics = ['AI/ML', 'Analytics', 'Architecture', 'Cloud Operations', 'Compute', 'Serverless & Containers', 'Database', 'Developer Tools', 'Security', 'Storage', 'Migration & Modernization', 'IoT', 'Other']
audience_types = ['Developers', 'System Administrator', 'IT Administrator', 'Data Scientists', 'Security Professionals', 'Other']
session_format = ['Breakout Session', 'Chalk Talk', 'Builder session', 'Workshop', 'Lightening Talk']

def list_to_string(lst):
    return str(lst).replace("'", '"')

tool_config = {
  "tools": [
    {
      "toolSpec": {
        "name": "SessionInfoExtractor",
        "description": "Extracts key information from AWS session descriptions including topics, AWS services, and target audience.",
        "inputSchema": {
          "json": {
            "type": "object",
            "properties": {
              "topics": {
                "description": f"List of main technical topics covered in the session. Available options are {list_to_string(topics)}",
                "type": "array",
                "items": {
                  "type": "string"
                }
              },
              "aws_services": {
                "type": "array",
                "description": "List of AWS services mentioned in the session synopsis, such as 'Amazon S3', 'AWS Lambda', 'Amazon RDS', etc. If the service is not mentioned, return an empty array.",
                "items": {
                  "type": "string"
                }
              },
              "target_audience": {
                "type": "array",
                "description": f"List of potential target audiences for this session. Available options are {list_to_string(audience_types)}",
                "items": {
                  "type": "string"
                }
              },
              "session_format": {
                "type": "string",
                "description": f"Format of the session. Available options are {list_to_string(session_format)}",
              }
            },
            "required": ["topics", "aws_services", "target_audience", "session_format"]
          }
        }
      }
    }
  ]
}




In [None]:
def invoke_model(parsed_session):
    sys_prompt = """
    You are a top-tier algorithm designed for extracting information in structured formats.
    Do not add any information that is not explicitly given in the available options within the tool spec.
    """

    user_prompt = """
    Tip: Make sure to answer in the correct format and do not include any explanations. Use the given format to extract information from the following input: 

    title - {session_title}
    synopsis - {session_synopsis}
    """
    
    full_title = parsed_session['title']
    synopsis = parsed_session['synopsis']

    title_parts = full_title.split(' | ', 1)
    session_code = title_parts[0].strip() if len(title_parts) > 1 else ''
    title = title_parts[1].strip() if len(title_parts) > 1 else full_title

    print(title)
    print(synopsis)

    sys_prompt, user_prompt = create_prompt(sys_prompt, user_prompt, session_title=title, session_synopsis=synopsis)
    response = converse_with_bedrock_tools(sys_prompt, user_prompt, tool_config)
    return response, session_code, title

In [None]:
import logging
import json

class StationNotFoundError(Exception):
    """Raised when a radio station isn't found."""
    pass
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

def parse_tool_use(message, title, code, synopsis):
    stop_reason = message['stopReason']

    if stop_reason == 'tool_use':
        tool_requests = message['output']['message']['content']
        for tool_request in tool_requests:
            if 'toolUse' in tool_request:
                tool = tool_request['toolUse']

                if tool['name'] == 'SessionInfoExtractor':
                    logger.info("Requesting tool %s. Request: %s", tool['name'], tool['input'])
                    new_session_info = {
                        "code": code,
                        "title": title,
                        "synopsis": synopsis,
                        "topics": tool['input']['topics'],
                        "aws_services": tool['input']['aws_services'],
                        "target_audience": tool['input']['target_audience'],
                        "session_format": tool['input']['session_format'],
                    }
                    return new_session_info
    return None

In [None]:
response, session_code, title = invoke_model(parsed_sessions[0])
new_session_info = parse_tool_use(response, title, session_code, parsed_sessions[0]['synopsis'])

In [None]:
from time import sleep
 
all_sessions_info = []

for session in parsed_sessions:
    sleep(1)
    response, session_code, title = invoke_model(session)
    new_session_info = parse_tool_use(response, title, session_code, session['synopsis'])
    if new_session_info:
        all_sessions_info.append(new_session_info)

with open('sessions_info.json', 'w', encoding='utf-8') as f:
    json.dump(all_sessions_info, f, ensure_ascii=False, indent=4)