In [14]:
!pip install --upgrade boto3 # Ensure that we're using the latest AWS SDK.
!pip install --upgrade awscli
!pip install --upgrade botocore
!pip install textstat

!pip install --quiet \
    langchain==0.0.309 \
    "transformers>=4.24,<5" \
    sqlalchemy -U \
    "faiss-cpu>=1.7,<2" \
    "pypdf>=3.8,<4" \
    pinecone-client \
    apache-beam \
    datasets \
    tiktoken \
    "ipywidgets>=7,<8" \
    matplotlib

# restart kernel
from IPython.core.display import HTML
HTML("<script>Jupyter.notebook.kernel.restart()</script>")

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
autogluon-multimodal 0.8.2 requires transformers[sentencepiece]<4.32.0,>=4.31.0, but you have transformers 4.35.2 which is incompatible.
autovizwidget 0.21.0 requires pandas<2.0.0,>=0.20.1, but you have pandas 2.1.2 which is incompatible.
hdijupyterutils 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.1.2 which is incompatible.
jupyter-ai 2.5.0 requires langchain==0.0.318, but you have langchain 0.0.309 which is incompatible.
jupyter-ai-magics 2.5.0 requires langchain==0.0.318, but you have langchain 0.0.309 which is incompatible.
jupyter-scheduler 2.3.0 requires sqlalchemy~=1.0, but you have sqlalchemy 2.0.23 which is incompatible.
sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.1.2 which is incompatible.[0m[31m
[0m

In [1]:
import os
import sys
import json
import boto3
import shutil
import nltk
from nltk.tokenize import word_tokenize
import spacy

# Install the spaCy model
!python -m spacy download en_core_web_sm

# Initialization and setup
nltk.download('punkt')
nlp = spacy.load("en_core_web_sm")

[0mCollecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m60.9 MB/s[0m eta [36m0:00:00[0m00:01[0m0:01[0m
[0m[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


[nltk_data] Downloading package punkt to /home/sagemaker-
[nltk_data]     user/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [19]:
####
# The models to use and their parameters.

# Different models have different requirements for prompt structure.

# Function to generate request for Claude
def generate_claude_request(model_info, base_prompt):
    prompt = f"\n\nHuman: {base_prompt}\n\nAssistant:"
    request_dict = {
        "prompt": prompt,
        "max_tokens_to_sample": model_info["parameters"]["max_tokens_to_sample"],
        "temperature": model_info["parameters"]["temperature"],
        "top_p": model_info["parameters"]["top_p"]
    }
    return request_dict

# Function to generate request for Titan
def generate_titan_request(model_info, base_prompt):
    request_dict = {
        "inputText": base_prompt,
        "textGenerationConfig": {
            "temperature": model_info["parameters"]["temperature"],  
            "topP": model_info["parameters"]["topP"],
            "maxTokenCount": model_info["parameters"]["maxTokenCount"]
        }
    }
    return request_dict

# Differenet models have different response formats.
def handle_titan_response(response):
    complete_response = ""
    results = response.get('results', [])

    # Loop through each result and concatenate the outputText
    for result in results:
        output_text = result.get('outputText', '')
        complete_response += output_text

    return complete_response.strip()

def handle_claude_response(response):
    return response.get('completion')

# Dictionary with models and their custom functions

models = {
    "Titan-Text-G1-Express:0.5-top_p:1-max_tokens:300": {
        "model_id": "amazon.titan-text-express-v1",
        "parameters": {
            "temperature": 0.5,
            "topP": 1,
            "maxTokenCount": 300
        },
        "generate_request": generate_titan_request,
        "handle_response": handle_titan_response,
    },
    "Titan-Text-G1-Express:0.7-top_p:0.8-max_tokens:1000": {
        "model_id": "amazon.titan-text-express-v1",
        "parameters": {
            "temperature": 0.7,
            "topP": 0.8,
            "maxTokenCount": 1000
        },
        "generate_request": generate_titan_request,
        "handle_response": handle_titan_response,
    },
    "ClaudeV2-temperature:0.5-top_p:1-max_tokens:300": {
        "model_id": "anthropic.claude-v2:1",
        "parameters": {
            "temperature": 0.5,
            "top_p": 1,
            "max_tokens_to_sample": 300
        },
        "generate_request": generate_claude_request,
        "handle_response": handle_claude_response
    },
    "ClaudeV2-temperature:0.7-top_p:0.8-max_tokens:1000": {
        "model_id": "anthropic.claude-v2:1",
        "parameters": {
            "temperature": 0.7,
            "top_p": 0.8,
            "max_tokens_to_sample": 1000
        },
        "generate_request": generate_claude_request,
        "handle_response": handle_claude_response
    },
}

####
# The various writing styles.

styles = [None, "Ernest Hemingway", "Joan Didion", "William Shakespeare", "Jeff Spicoli", "Hermione Granger"]

boto3_bedrock = boto3.client(service_name='bedrock-runtime')

def query_bedrock(model_id, request_dict):
    # print("\nFull request body:", request_dict)
    
    try:
        accept = 'application/json'
        contentType = 'application/json'
        body_bytes = json.dumps(request_dict).encode('utf-8')
        response = boto3_bedrock.invoke_model(body=body_bytes, modelId=model_id, accept=accept, contentType=contentType)
        response_body = json.loads(response.get('body').read())
        return response_body
    except Exception as e:
        print(f"Error querying Bedrock model: {e}")
        return None

# Function to format the prompt
def format_instructions(name, base_content):
    if name:
        return f"Summarize the following content in the style of {name}.  Give me only the summary and no introduction.\n{base_content}"
    else:
        return f"Summarize the following content.  Give me only the summary and no introduction.\n{base_content}"

def print_instructions(prompt: str, response: str) -> None:
    """Prints the instructions and the model's response."""
    bold, unbold = '\033[1m', '\033[0m'
    print(f"{bold}> Input{unbold}\n{prompt}\n\n{bold}> Output{unbold}\n{response[0]['generated_text']}\n")

####
# Read all text files from the content folder

content_folder = 'content'
os.makedirs(content_folder, exist_ok=True)
base_contents = []

for filename in os.listdir(content_folder):
    file_path = os.path.join(content_folder, filename)
    if os.path.isfile(file_path):
        with open(file_path, 'r') as file:
            content = file.read()
            base_contents.append(content)

# Check if the folder exists, and if so, delete it and its contents
completions_folder = "completions"
# if os.path.exists(completions_folder):
#     shutil.rmtree(completions_folder)

# Main loop for generating responses
for model_name, model_info in models.items():
    
    model_folder = os.path.join(completions_folder, model_name)
    os.makedirs(model_folder, exist_ok=True)

    # Create a JSON file containing the model parameters
    parameters_file_path = os.path.join(model_folder, "parameters.json")
    with open(parameters_file_path, "w") as params_file:
        json.dump(model_info["parameters"], params_file, indent=4)

    for style in styles:
        style_dir = "control" if style is None else style
        style_folder = os.path.join(model_folder, style_dir)
        os.makedirs(style_folder, exist_ok=True)

        for base_content in base_contents:
            prompt = format_instructions(style, base_content)

            # Generate prompt using the model-specific function if defined
            prompt_path = os.path.join(style_folder, "prompt.txt")

            # Write the prompt to a file
            with open(prompt_path, "w") as file:
                file.write(prompt)

            for i in range(3):
                # Generate the full request dictionary using the model-specific function
                bedrock_payload = model_info.get("generate_request")(model_info, prompt)

                # Call Bedrock API
                response = query_bedrock(model_info["model_id"], bedrock_payload)

                # print("\nFull response body:", response)
        
                # Process the response using model-specific function if defined
                completion = model_info.get("handle_response")(response)

                # Write each completion to a file
                completion_path = os.path.join(style_folder, f"completion-{i+1}.txt")
                with open(completion_path, "w") as file:
                    file.write(completion)

                # Optionally, print each completion
                print(f"\nCompletion for {style} with model {model_name}, completion {i+1}:")
                print(completion)



Completion for None with model Titan-Text-G1-Express:0.5-top_p:1-max_tokens:300, completion 1:
Here is the summary of the content:

Emily, an aspiring artist, and Jack, a young inventor, collaborated on a project to create a series of murals around the village, depicting its history and culture. Jack's inventions added motion and sound to the murals, making them accessible and engaging for everyone, especially children. The collaboration beautified the village and strengthened the bond within the community, as they all came together to celebrate their shared history and culture.

Completion for None with model Titan-Text-G1-Express:0.5-top_p:1-max_tokens:300, completion 2:
Two friends, Emily and Jack, collaborated on a project to create murals around the village that depicted its history and culture. Emily's vibrant murals were adorned with colors and intricate details, while Jack's inventions added motion and sound to the scenes. The collaboration strengthened the bond within the com