The code location: https://github.com/Barqawiz/ChatGPT-AWS-S3-Assistant

In [None]:
# uncomment below if you did not install the requirements
#! pip install openai==0.27.8
#! pip install boto3==1.26.155
#! pip install tenacity==8.2.2
#! pip install python-dotenv==1.0.0

In [1]:
import openai
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
) 
import json
import boto3
import os
import datetime
from urllib.request import urlretrieve

## Initials

In [2]:
from dotenv import load_dotenv
# if opened with colab, ensure to upload the .env file
load_dotenv() 

True

In [3]:
openai.api_key = os.environ.get("OPENAI_API_KEY")

In [4]:
# Create S3 client
s3_client = boto3.client('s3')

In [5]:
# Functions dict for the GPT model
functions = [
    {
        "name": "list_buckets",
        "description": "List all available S3 buckets",
        "parameters": {
            "type": "object",
            "properties": {}
        }
    },
    {
        "name": "list_objects",
        "description": "List the objects or files inside a given S3 bucket",
        "parameters": {
            "type": "object",
            "properties": {
                "bucket": {"type": "string", "description": "The name of the S3 bucket"},
                "prefix": {"type": "string", "description": "The folder path in the S3 bucket"},
            },
            "required": ["bucket"],
        },
    },
    {
        "name": "download_file",
        "description": "Download a specific file from an S3 bucket to a local distribution folder.",
        "parameters": {
            "type": "object",
            "properties": {
                "bucket": {"type": "string", "description": "The name of the S3 bucket"},
                "key": {"type": "string", "description": "The path to the file inside the bucket"},
                "directory": {"type": "string", "description": "The local destination directory to download the file, should be specificed by the user."},
            },
            "required": ["bucket", "key", "directory"],
        }
    },
    {
        "name": "upload_file",
        "description": "Upload a file to an S3 bucket",
        "parameters": {
            "type": "object",
            "properties": {
                "source": {"type": "string", "description": "The local source path or remote URL"},
                "bucket": {"type": "string", "description": "The name of the S3 bucket"},
                "key": {"type": "string", "description": "The path to the file inside the bucket"},
                "is_remote_url": {"type": "boolean", "description": "Is the provided source a URL (True) or local path (False)"},
            },
            "required": ["source", "bucket", "key", "is_remote_url"],
        }
    },
    {
        "name": "search_s3_objects",
        "description": "Search for a specific file name inside an S3 bucket",
        "parameters": {
            "type": "object",
            "properties": {
                "search_name": {"type": "string", "description": "The name of the file you want to search for"},
                "bucket": {"type": "string", "description": "The name of the S3 bucket"},
                "prefix": {"type": "string", "description": "The folder path in the S3 bucket"},
                "exact_match": {"type": "boolean", "description": "Indicates if search should match the file name exactly, set False to compating part of string (constains)"}
            },
            "required": ["search_name"],
        },
    }
]

## Helper Functions

In [6]:
def datetime_converter(obj):
    if isinstance(obj, datetime.datetime):
        return obj.isoformat()
    raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable")

In [7]:
def list_buckets():
    response = s3_client.list_buckets()
    return json.dumps(response['Buckets'], default=datetime_converter)

def list_objects(bucket, prefix=''):
    response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)
    return json.dumps(response.get('Contents', []), default=datetime_converter)

def download_file(bucket, key, directory):
    
    filename = os.path.basename(key)
    
    # Resolve destination to the correct file path
    destination = os.path.join(directory, filename)
    
    s3_client.download_file(bucket, key, destination)
    return json.dumps({"status": "success", "bucket": bucket, "key": key, "destination": destination})

def upload_file(source, bucket, key, is_remote_url=False):
    if is_remote_url:
        file_name = os.path.basename(source)
        urlretrieve(source, file_name)
        source = file_name
       
    s3_client.upload_file(source, bucket, key)
    return json.dumps({"status": "success", "source": source, "bucket": bucket, "key": key})

def search_s3_objects(search_name, bucket=None, prefix='', exact_match=True):
    search_name = search_name.lower()
    
    if bucket is None:
        buckets_response = json.loads(list_buckets())
        buckets = [bucket_info["Name"] for bucket_info in buckets_response]
    else:
        buckets = [bucket]

    results = []

    for bucket_name in buckets:
        objects_response = json.loads(list_objects(bucket_name, prefix))
        if exact_match:
            bucket_results = [obj for obj in objects_response if search_name == obj['Key'].lower()]
        else:
            bucket_results = [obj for obj in objects_response if search_name in obj['Key'].lower()]

        if bucket_results:
            results.extend([{"Bucket": bucket_name, "Object": obj} for obj in bucket_results])

    return json.dumps(results)

In [8]:
available_functions = {
    "list_buckets": list_buckets,
    "list_objects": list_objects,
    "download_file": download_file,
    "upload_file": upload_file,
    "search_s3_objects": search_s3_objects
}

## chatBot

In [9]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def chat_completion_request(messages, functions=None, function_call='auto', 
                            model_name='gpt-4-0613'):
    # model name: gpt-3.5-turbo-0613
    if functions is not None:
        return openai.ChatCompletion.create(
            model=model_name,
            messages=messages,
            functions=functions,
            function_call=function_call)
    else:
        return openai.ChatCompletion.create(
            model=model_name,
            messages=messages)

### manual conversation flow

In [10]:
def run_manual_conversation(user_input, topic="s3 bucket functions.", is_log=False):

    system_message=f"Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous. If the user ask question not related to {topic} response your scope is {topic} only."
    
    messages = [{"role": "system", "content": system_message},
                {"role": "user", "content": user_input}]
    
    # Call the model to get a response
    response = chat_completion_request(messages, functions=functions)
    response_message = response['choices'][0]['message']
    
    if is_log:
        print(response['choices'])
    
    # check if GPT wanted to call a function
    if response_message.get("function_call"):
        function_name = response_message['function_call']['name']
        function_args = json.loads(response_message['function_call']['arguments'])
        
        # Call the function
        function_response = available_functions[function_name](**function_args)
        
        # Add the response to the conversation
        messages.append(response_message)
        messages.append({
            "role": "function",
            "name": function_name,
            "content": function_response,
        })
        
        # Call the model again to summarize the results
        second_response = chat_completion_request(messages)
        final_message = second_response['choices'][0]['message']['content']
    else:
        final_message = response_message['content']

    return final_message

#### s3 bucket bot testing

In [11]:
print(run_manual_conversation('list my s3 buckets'))

Here are your S3 buckets along with their creation dates:

1. "bakcup-ahm-test" - Creation Date: 2023-06-17T15:34:07+00:00
2. "elasticbeanstalk-eu-west-1-576659633313" - Creation Date: 2023-03-17T20:55:33+00:00
3. "illinois-paper" - Creation Date: 2023-06-17T13:28:45+00:00
4. "sagemaker-studio-576659633313-jtlwwq35m9e" - Creation Date: 2023-03-31T20:15:15+00:00
5. "sagemaker-studio-l12jf4ctxw" - Creation Date: 2023-03-31T20:08:22+00:00


In [18]:
print(run_manual_conversation('search for a file reflections.zip in all buckets'))

I found the file "reflections.zip" in the following S3 buckets:

1. Bucket: "bakcup-ahm-test", Last Modified: "2023-06-17T16:05:16+00:00", Size: 4862255 bytes, Storage Class: STANDARD
2. Bucket: "illinois-paper", Last Modified: "2023-06-17T12:38:24+00:00", Size: 4862255 bytes, Storage Class: STANDARD


In [13]:
print(run_manual_conversation('search for a contains reflection', is_log=False))

It seems like you want to search for a file name that contains the word "reflection". However, I need to know the name of the S3 bucket in which we should perform this search. Could you please provide that? Also, let me know if you want to restrict the search under a specific folder path in that bucket.


In [14]:
print(run_manual_conversation('search for a file'))

Sure, to help me find what you're looking for, could you please provide the name of the file you want to search for and the name of the S3 bucket? Also, should the search match the file name exactly, or should it also consider partial matches?


In [15]:
print(run_manual_conversation('what is the weather today'))

Apologies for the misunderstanding, but I am only able to assist with S3 bucket functions. Can you please ask a question related to S3 bucket functions?


In [16]:
print(run_manual_conversation('download Reflections.zip from illinois-paper bucket to current directory and download to ./resource folder'))

I have successfully downloaded the file "Reflections.zip" from the "illinois-paper" bucket to the "./resource" folder.


In [17]:
print(run_manual_conversation('upload ./resource/Reflections.zip to bakcup-ahm-test bucket'))

The file `Reflections.zip` is successfully uploaded to the `bakcup-ahm-test` bucket.


MIT License

Copyright (c) 2023 Albarqawi