# Generate Challenge Headlines with AWS Bedrock

## Overview

This notebook generates challenge headlines for challenges fetched from OpenChallenges (OC).

## Requirements

- Access to OpenChallenges REST API
- Access to OpenAI API

## Preparation

Load config file `.env`.

In [1]:
from dotenv import load_dotenv

load_dotenv()

True

## List challenges

In [2]:
import openchallenges_client
from pprint import pprint
from openchallenges_client.api import challenge_api

In [3]:
# See configuration.py for a list of all supported configuration parameters.
configuration = openchallenges_client.Configuration(
    host = "https://openchallenges.io/api/v1"
)

In [4]:
# Enter a context with an instance of the API client
challenges = []
with openchallenges_client.ApiClient(configuration) as api_client:
    api_instance = challenge_api.ChallengeApi(api_client)
    
    query = openchallenges_client.ChallengeSearchQuery(page_number=1000, page_size=1)

    try:
        # Get the first page of the list of challenges
        page = api_instance.list_challenges(query)
        challenges.extend(page.challenges)
    except openchallenges_client.ApiException as e:
        print("Exception when calling ChallengeApi->list_challenges: %s\n" % e)

## Prepare the challenge descriptions

In [20]:
import requests
from bs4 import BeautifulSoup

TODO

In [None]:
challenge = challenges[0]

response = requests.get(challenge.website_url)





pprint(response)

## Generate the headlines with AWS Bedrock

### Configure Bedrock client

In [5]:
import json
import os
import sys

import boto3
import botocore

module_path = ".."
sys.path.append(os.path.abspath(module_path))
from utils import bedrock, print_ww

os.environ["AWS_DEFAULT_REGION"] = "us-east-1"
os.environ["AWS_PROFILE"] = "cnb"

boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None)
)

Create new client
  Using region: us-east-1
  Using profile: cnb
boto3 Bedrock client successfully created!
bedrock-runtime(https://bedrock-runtime.us-east-1.amazonaws.com)


### Configure base model options

In [6]:
from langchain.llms.bedrock import Bedrock

inference_modifier = {'max_tokens_to_sample':6000, 
                      "temperature":0.6,
                      "top_k":250,
                      "top_p":1,
                      "stop_sequences": ["\n\nHuman"]
                     }

textgen_llm = Bedrock(model_id = "anthropic.claude-v2",
                    client = boto3_bedrock, 
                    model_kwargs = inference_modifier 
                    )


In [7]:
def generate_challenge_headlines(text, num_headlines):
    prompt=(
        f"Please generate {num_headlines} headlines that have a maximum ten words from the "
        "following challenge description. The headline must summarize the goal of the challenge. "
        f"Description: \n{text}"
    )
    response = Bedrock(model_id = "anthropic.claude-v2",
                    client = boto3_bedrock, 
                    model_kwargs = inference_modifier 
                )(prompt)
    return response

In [8]:
from itertools import compress
import json

def is_raw_headline(raw_headline):
    prefixes = ("1. ", "2. ", "3. ", "4. ", "5. ")
    return raw_headline.startswith(prefixes)

Authentify with AWS using the command:

```console
aws --profile cnb sso login
```

In [9]:
challenge = challenges[0]
response = generate_challenge_headlines(challenge.description, 5)

pprint(response)

(' Here are 5 headlines with a maximum of 10 words summarizing the goal of the '
 'challenge:\n'
 '\n'
 '1. Challenge Seeks to Improve Data for AI Discovery\n'
 '\n'
 '2. Data Challenge Aims to Augment Repository for AI Use \n'
 '\n'
 '3. Challenge Targets Data Standardization for AI Research\n'
 '\n'
 '4. Competition Focuses on Making Data AI Ready  \n'
 '\n'
 '5. Challenge Works to Ready Data for AI Insights')


In [14]:
raw_headlines = response.splitlines()
headlines = list(compress(raw_headlines, map(is_raw_headline, raw_headlines)))

obj = {
    "id": challenge.id,
    "slug": challenge.slug,
    "name": challenge.name,
    "headline": challenge.headline,
    "headline_alternatives": headlines
}
json_str = json.dumps(obj, indent=2)

print(json_str)

{
  "id": 279,
  "slug": "niddk-central-repository-data-centric-challenge",
  "name": "NIDDK Central Repository Data-Centric Challenge",
  "headline": "Enhancing NIDDK datasets for future Artificial Intelligence (AI) applications.",
  "headline_alternatives": [
    "1. Challenge Seeks to Improve Data for AI Discovery",
    "2. Data Challenge Aims to Augment Repository for AI Use ",
    "3. Challenge Targets Data Standardization for AI Research",
    "4. Competition Focuses on Making Data AI Ready  ",
    "5. Challenge Works to Ready Data for AI Insights"
  ]
}


### Output challenge headlines

In [None]:
from itertools import compress
import json

raw_headlines = result.splitlines()

def is_raw_headline(raw_headline):
    prefixes = ("1. ", "2. ", "3. ", "4. ", "5. ")
    return raw_headline.startswith(prefixes)

headlines = list(compress(raw_headlines, map(is_raw_headline, raw_headlines)))

obj = {
    "id": challenge.id,
    "slug": challenge.slug,
    "name": challenge.name,
    "headline": challenge.headline,
    "headline_alternatives": headlines
}
json_str = json.dumps(obj, indent=2)

print(json_str)