# 2023-03-16 | Batch Transform Jobs for Westbrook Sites

Run batch transform jobs in `conte-prod` on all westbrook sites.

Steps:

1. fetch authentication tokens for `conte-prod`
1. get list of stations owned by USGS Conte
1. for each station, get list of imagesets
1. for each imageset, create batch transform job and save output to sagemaker bucket

This notebook is designed to run in the `fpe-pii` conda environment.

```sh
conda activate fpe-pii
```

## Authentication Tokens

Fetch credentials for `conte-prod` using `aws-cli`. Enter password when requested. Final command should list all s3 buckets owned by `conte-prod`.

```sh
conda activate aws-cli
export AWS_PROFILE=conte-prod
# activate VPN
aws s3 ls
```

## Database

Load credentials from `.env.local` and connect to the FPE database using `sqlalchemy` (< 2.0).

In [2]:
%reload_ext dotenv
%dotenv ../../.env.local

In [3]:
import os
import json

from sqlalchemy import create_engine

DATABASE_URL = f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@{os.getenv('DB_HOST')}:{os.getenv('DB_PORT')}/{os.getenv('DB_DBNAME')}"
engine = create_engine(DATABASE_URL)
print(engine)

Engine(postgresql://sheds:***@fpe-prod.c5p6gaiawuao.us-west-2.rds.amazonaws.com:5432/postgres)


Fetch stations using `pandas`

In [4]:
import pandas as pd

conte_user_id = "0626d282-0267-40b0-8f17-214c8f72e551"

query = f"SELECT id, name FROM stations WHERE user_id='{conte_user_id}'"

# Execute the SQL query and return the results as a Pandas DataFrame
df_stations = pd.read_sql(query, engine)

print(f"stations: n={len(df_stations)}")

df_stations.head()

stations: n=23


Unnamed: 0,id,name
0,12,Avery Brook_Bridge_01171000
1,15,Avery Brook_River Left_01171000
2,42,Dry Brook Lower
3,9,Sanderson Brook_01171010
4,10,West Brook Lower_01171090


In [5]:
# fetch imagesets

query = f"SELECT imagesets.id as imageset_id, station_id, stations.name as station_name, uuid as imageset_uuid, n_images FROM imagesets LEFT JOIN stations ON imagesets.station_id=stations.id WHERE stations.user_id='{conte_user_id}' AND imagesets.status='DONE'"

# Execute the SQL query and return the results as a Pandas DataFrame
df_imagesets = pd.read_sql(query, engine)

print(f"df_imagesets: n={len(df_imagesets)}")

df_imagesets.head()

df_imagesets: n=382


Unnamed: 0,imageset_id,station_id,station_name,imageset_uuid,n_images
0,326,17,West Whately_01171005,03d970b4-7e0f-4ee5-b895-39222ac214d8,2610
1,333,10,West Brook Lower_01171090,c01b91e1-8f01-4aa5-98b1-828d7e84b408,2180
2,380,42,Dry Brook Lower,7fc5646c-0ef0-4d96-8d3b-58f597d28856,1320
3,363,33,Mitchell Brook_01171080,b776ed42-5607-4de8-a287-bbc2e60d8c7f,3317
4,335,18,Obear Brook Lower_01171070,d210fdb7-fbd1-483d-92dc-40ee2ed381af,134


In [6]:
# filter imagesets for westbrook zero (station.id=29)
df_imagesets_westbrook_0 = df_imagesets[df_imagesets['station_id'] == 29]

print(f"# images at westbrook 0: {df_imagesets_westbrook_0['n_images'].sum()}")
df_imagesets_westbrook_0

# images at westbrook 0: 28614


Unnamed: 0,imageset_id,station_id,station_name,imageset_uuid,n_images
7,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280
12,421,29,West Brook 0_01171100,cb2b67b0-0f07-4351-b96d-2659d5b56c9f,2749
31,396,29,West Brook 0_01171100,33c0736b-f5fb-4da6-91b0-83a72c285382,2455
46,423,29,West Brook 0_01171100,6cd6870a-0260-4687-840d-1ac4ac2794e4,1704
52,526,29,West Brook 0_01171100,b94e847f-43c3-43cd-973a-14f0c5af29ad,2427
78,593,29,West Brook 0_01171100,5a53b364-7a42-4708-b66e-d837c6b05f3e,315
110,495,29,West Brook 0_01171100,a7918a28-258e-40a7-bf79-8aa1895d4c65,4702
268,289,29,West Brook 0_01171100,e8d465f6-5784-4231-967f-9000428e9748,717
311,713,29,West Brook 0_01171100,e2a3b2b6-d391-481a-9814-dc61cf83a990,3547
331,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955


## Batch Transform

In [7]:
import boto3, time

# config
AWS_PROFILE="conte-prod"
AWS_REGION="us-west-2"
STORAGE_BUCKET_NAME="usgs-chs-conte-prod-fpe-storage"
MODEL_BUCKET_NAME="usgs-chs-conte-prod-fpe-models"
SAGEMAKER_MODEL_NAME="fpe-prod-pii"
SAGEMAKER_ROLE_ARN="arn:aws:iam::694155575325:role/fpe-prod-pii-execution-role"

def timestamp():
    return time.strftime("%Y%m%d-%H%M%S")

# get credentials for sagemaker execution role
def get_sagemaker_creds(session, role_arn):
    sts = session.client("sts")
    response = sts.assume_role(
        RoleArn=role_arn,
        RoleSessionName=f"fpe-pii-session--{timestamp()}"
    )
    return response['Credentials']

# function to create parameters for a SageMaker batch transform job given an imageset uuid
def create_batch_transform_parameters(imageset_uuid, instance_count = 1, instance_type = "ml.m5.large"):
    return {
        "TransformJobName": f"{SAGEMAKER_MODEL_NAME}-{imageset_uuid[:5]}-{timestamp()}",
        "ModelName": SAGEMAKER_MODEL_NAME,
        "TransformInput": {
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": f"s3://{STORAGE_BUCKET_NAME}/imagesets/{imageset_uuid}/images/",
                }
            },
            "CompressionType": "None",
        },
        "TransformOutput": {
            "S3OutputPath": f"s3://{MODEL_BUCKET_NAME}/pii/imagesets/{imageset_uuid}/",
            "AssembleWith": "Line",
        },
        "TransformResources": {
            "InstanceType": instance_type,
            "InstanceCount": instance_count,
        },
        "BatchStrategy": "MultiRecord",
        "ModelClientConfig": {
            "InvocationsTimeoutInSeconds": 3600,
            "InvocationsMaxRetries": 1,
        },
    }

def submit_batch_transform(client, request):
    return client.create_transform_job(**request)

def stop_batch_transform(client, TransformJobName):
    return client.stop_transform_job(TransformJobName=TransformJobName)

def monitor_batch_transform(client, TransformJobName):
    print(f"monitoring transform job: {TransformJobName}")
    while True:
        response = client.describe_transform_job(TransformJobName=TransformJobName)
        status = response["TransformJobStatus"]
        if status == "Completed":
            print("Transform job ended with status: " + status)
            break
        if status == "Failed":
            message = response["FailureReason"]
            print("Transform failed with the following error: {}".format(message))
            raise Exception("Transform job failed")
        print("Transform job is still in status: " + status)
        time.sleep(30)



In [8]:
session = boto3.Session(profile_name=AWS_PROFILE)
creds = get_sagemaker_creds(session, SAGEMAKER_ROLE_ARN)
print(creds)

{'AccessKeyId': 'ASIA2DHXFQAOQOS6FWK4', 'SecretAccessKey': '+0GCd5FxfoDT879zZ/KHXfmfz7YSgovzcY55p/v/', 'SessionToken': 'FwoGZXIvYXdzEH0aDFS7QXC3mNqgED2wyiLEAQfa5iYPhyAOe171qRC31t0XYWFO4VXxpXUYhib//+EHB0rWwn5aTX9BNKKI13HvJsPKX+Jy76MPPfggEO1Q4zswgASG+3kuqK8W80yRa2h9zD4tjH2cnTAHEuXpEoSZzWscNvJxY/3kbIwqqAcr9fmznpqL44jOjE8/3D3otswzOskYEbgb5r9csW/7j4YJVr4yL8uzUrYXksT2i/K7gsfG1fsiYtfAxNvn/8yQIMGlaPcahFQrcyh7T7MWayY/hx0fVa4ovYvToAYyLZTd7SWCHeldJU2eYjB8K8yc2c1DgvdI9n0t+XjIMrYUW7mjkw+HIMMaU6fRow==', 'Expiration': datetime.datetime(2023, 3, 17, 20, 55, 41, tzinfo=tzutc())}


In [9]:
sm_session = boto3.Session(aws_access_key_id=creds['AccessKeyId'],
                           aws_secret_access_key=creds['SecretAccessKey'],
                           aws_session_token=creds['SessionToken'],
                           region_name=AWS_REGION)
print(sm_session)
sm_client = sm_session.client('sagemaker')
print(sm_client)

Session(region_name='us-west-2')
<botocore.client.SageMaker object at 0x00000132D5B7F100>


In [55]:
request = create_batch_transform_parameters(df_imagesets['imageset_uuid'][0], instance_count=4)
request

{'TransformJobName': 'fpe-prod-pii-03d97-20230316-125938',
 'ModelName': 'fpe-prod-pii',
 'TransformInput': {'DataSource': {'S3DataSource': {'S3DataType': 'S3Prefix',
    'S3Uri': 's3://usgs-chs-conte-prod-fpe-storage/imagesets/03d970b4-7e0f-4ee5-b895-39222ac214d8/images/'}},
  'CompressionType': 'None'},
 'TransformOutput': {'S3OutputPath': 's3://usgs-chs-conte-prod-fpe-models/pii/imagesets/03d970b4-7e0f-4ee5-b895-39222ac214d8/',
  'AssembleWith': 'Line'},
 'TransformResources': {'InstanceType': 'ml.m5.large', 'InstanceCount': 4},
 'BatchStrategy': 'MultiRecord',
 'ModelClientConfig': {'InvocationsTimeoutInSeconds': 3600,
  'InvocationsMaxRetries': 1}}

In [56]:
submit_batch_transform(sm_client, request)

{'TransformJobArn': 'arn:aws:sagemaker:us-west-2:694155575325:transform-job/fpe-prod-pii-03d97-20230316-125938', 'ResponseMetadata': {'RequestId': 'd2394161-00ab-4f2e-8f3b-08cdbc70c152', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'd2394161-00ab-4f2e-8f3b-08cdbc70c152', 'content-type': 'application/x-amz-json-1.1', 'content-length': '111', 'date': 'Thu, 16 Mar 2023 16:59:43 GMT'}, 'RetryAttempts': 0}}


In [57]:
monitor_batch_transform(sm_client, request)

monitoring transform job: fpe-prod-pii-03d97-20230316-125938
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InProgress
Transform job is still in status: InPro

ClientError: An error occurred (ExpiredTokenException) when calling the DescribeTransformJob operation: The security token included in the request is expired

In [46]:
for index, row in df_imagesets_westbrook_0[9:].iterrows():
    uuid = row.uuid
    request = create_batch_transform_parameters(uuid, instance_count=1)
    response = submit_batch_transform(sm_client, request)
    print(f"imageset.uuid={uuid}, TransformJobName={request['TransformJobName']}")


imageset.uuid=265292ae-007e-4a94-a86c-e01028d85c1f, TransformJobName=fpe-prod-pii-26529-20230316-150020
imageset.uuid=935ac74c-7fc1-476b-ac66-f6f9070e2209, TransformJobName=fpe-prod-pii-935ac-20230316-150021


In [65]:
stop_batch_transform(sm_client, 'fpe-prod-pii-ebfbd-20230316-135116')

{'ResponseMetadata': {'RequestId': 'eea93efa-ac89-441b-8021-0eb41d8a681d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'eea93efa-ac89-441b-8021-0eb41d8a681d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Thu, 16 Mar 2023 17:53:14 GMT'},
  'RetryAttempts': 0}}

In [27]:
df_imagesets_westbrook_0

Unnamed: 0,id,station_id,station_name,uuid,n_images
7,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280
12,421,29,West Brook 0_01171100,cb2b67b0-0f07-4351-b96d-2659d5b56c9f,2749
31,396,29,West Brook 0_01171100,33c0736b-f5fb-4da6-91b0-83a72c285382,2455
46,423,29,West Brook 0_01171100,6cd6870a-0260-4687-840d-1ac4ac2794e4,1704
52,526,29,West Brook 0_01171100,b94e847f-43c3-43cd-973a-14f0c5af29ad,2427
78,593,29,West Brook 0_01171100,5a53b364-7a42-4708-b66e-d837c6b05f3e,315
110,495,29,West Brook 0_01171100,a7918a28-258e-40a7-bf79-8aa1895d4c65,4702
268,289,29,West Brook 0_01171100,e8d465f6-5784-4231-967f-9000428e9748,717
311,713,29,West Brook 0_01171100,e2a3b2b6-d391-481a-9814-dc61cf83a990,3547
331,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955


## Process Results

Fetch output files from s3 and save max detection probabilities to dataframe along with imageset uuid and image id, filename, and url.

In [9]:
def s3_list_objects(client, bucket, prefix = ''):
    # Set up the initial continuation token
    continuation_token = ''
    df = pd.DataFrame(columns=['Key'])

    # Loop over all objects in the bucket
    while True:
        if (continuation_token):
            response = client.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=continuation_token)
        else:
            response = client.list_objects_v2(Bucket=bucket, Prefix=prefix)

        # Print the names of all objects in the response
        # for obj in response['Contents']:
        #     print(obj['Key'])
        df_new = pd.DataFrame([{'Key': obj['Key']} for obj in response['Contents']])
        df = pd.concat([df, df_new], ignore_index=True)

        # Check if there are more objects to fetch
        if 'NextContinuationToken' in response:
            continuation_token = response['NextContinuationToken']
        else:
            break

    return df


In [10]:
s3 = session.client('s3')
s3

<botocore.client.S3 at 0x1e096ad53a0>

In [11]:
def extract_imageset_uuid(key):
    return key.split('/')[-2]

def extract_image_filename(key):
    basename = os.path.basename(key)
    return os.path.splitext(basename)[0]

key='pii/imagesets/03d970b4-7e0f-4ee5-b895-39222ac214d8/West Whately__2022-02-01__08-45-01(1).JPG.out'
print(f"key={key}, uuid={extract_imageset_uuid(key)}, filename={extract_image_filename(key)}")

key=pii/imagesets/03d970b4-7e0f-4ee5-b895-39222ac214d8/West Whately__2022-02-01__08-45-01(1).JPG.out, uuid=03d970b4-7e0f-4ee5-b895-39222ac214d8, filename=West Whately__2022-02-01__08-45-01(1).JPG


In [14]:
def pii_image_key(imageset_uuid, filename):
    return(f"pii/imagesets/{imageset_uuid}/{filename}.out")

classes = { "1": "animal", "2": "person", "3": "vehicle" }

def get_images_for_imageset(engine, imageset_uuid):
    query = f"SELECT imagesets.id as imageset_id, imagesets.uuid as imageset_uuid, images.id as image_id, images.filename, images.timestamp, images.full_url FROM imagesets LEFT JOIN images ON imagesets.id=images.imageset_id WHERE imagesets.uuid='{imageset_uuid}'"
    # Execute the SQL query and return the results as a Pandas DataFrame
    return(pd.read_sql(query, engine))

def get_pii_detections_for_imageset(s3_client, db_engine, imageset_uuid):
    print(f"get_pii_detections_for_imageset(uuid='{imageset_uuid}')")
    df_images = get_images_for_imageset(db_engine, imageset_uuid)
    dfs = []
    for index, row in df_images.iterrows():
        image_detections = get_pii_detections_for_image(s3_client, imageset_uuid, row.filename)
        if (len(image_detections) > 0):
            image_detections['filename'] = row.filename
            dfs.append(image_detections)
    df_detections = pd.concat(dfs, ignore_index=True)
    df_detections = pd.merge(df_images, df_detections, on='filename', how='outer')
    return(df_detections)

def get_pii_detections_for_imagesets(s3_client, db_engine, imageset_uuids):
    dfs = []
    for uuid in imageset_uuids:
        imageset_detections = get_pii_detections_for_imageset(s3_client, db_engine, uuid)
        imageset_detections.to_csv(f'../data/detections/{uuid}.csv', index=False)
        if (len(imageset_detections) > 0):
            dfs.append(imageset_detections)
    return(pd.concat(dfs, ignore_index=True))

def get_pii_detections_for_image(client, imageset_uuid, filename):
    # print(f"get_pii_detections_for_image(uuid='{imageset_uuid}', filename='{filename}')")
    key = pii_image_key(imageset_uuid, filename)
    # key = 'test/pii/Atherton Brook__2023-02-15__13-48-48(49).JPG.out'
    try:
        response = client.get_object(Bucket=MODEL_BUCKET_NAME, Key=key)
    except:
        print(f'WARNING: pii results not found at key={key}')
        return([])
    content = response['Body'].read().decode('utf-8')
    data = json.loads(content)
    if (len(data) == 0):
        return([])
    df = pd.DataFrame(data)
    max_confidences = df.groupby('class')['confidence'].max().reset_index()
    max_confidences['class'] = max_confidences['class'].astype(str).map(classes)
    return(max_confidences)

# rows = get_pii_detections_for_image(s3, '03d970b4-7e0f-4ee5-b895-39222ac214d8', 'West Whately__2022-02-01__08-45-01(1).JPG')
# pd.DataFrame(rows)

In [15]:
%%time

# df_images = get_images_for_imageset(engine, '03d970b4-7e0f-4ee5-b895-39222ac214d8')
# df_detections = get_pii_detections_for_imagesets(s3, engine, ['03d970b4-7e0f-4ee5-b895-39222ac214d8', 'e8d465f6-5784-4231-967f-9000428e9748'])
get_pii_detections_for_imagesets(s3, engine, df_imagesets_westbrook_0['imageset_uuid'].to_list())

get_pii_detections_for_imageset(uuid='265292ae-007e-4a94-a86c-e01028d85c1f')
get_pii_detections_for_imageset(uuid='935ac74c-7fc1-476b-ac66-f6f9070e2209')
CPU times: total: 27.8 s
Wall time: 15min 36s


Unnamed: 0,imageset_id,station_id,station_name,imageset_uuid,n_images,image_id,filename,full_url,class,confidence
0,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644113,West Brook 0__2022-12-20__10-24-55(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.017867
1,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644160,West Brook 0__2022-12-20__22-09-55(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.00426
2,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644162,West Brook 0__2022-12-20__22-39-54(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.009702
3,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644163,West Brook 0__2022-12-20__22-54-54(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.0046
4,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644165,West Brook 0__2022-12-20__23-30-00(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.011965
5,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644167,West Brook 0__2022-12-21__00-15-00(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.016478
6,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644169,West Brook 0__2022-12-21__00-45-01(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.003712
7,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644171,West Brook 0__2022-12-21__01-15-01(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.003622
8,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644172,West Brook 0__2022-12-21__01-30-01(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.007429
9,792,29,West Brook 0_01171100,265292ae-007e-4a94-a86c-e01028d85c1f,3955,1644174,West Brook 0__2022-12-21__02-00-00(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.004239


In [34]:
dfs = []
for uuid in df_imagesets_westbrook_0['imageset_uuid'].to_list():
    df_detections = pd.read_csv(f'../data/detections/{uuid}.csv')
    df_images = get_images_for_imageset(engine, uuid)[["imageset_id", "image_id", "timestamp"]]
    df = pd.merge(df_detections, df_images, on=["imageset_id", "image_id"], how="left")
    if (len(df) > 0):
        dfs.append(df)
df_detections = pd.merge(df_imagesets, pd.concat(dfs, ignore_index=True), on=['imageset_id','imageset_uuid'], how='inner').sort_values(["imageset_id", "timestamp"])
df_detections = df_detections[["station_id", "station_name", "imageset_id", "imageset_uuid", "image_id", "filename", "timestamp", "class", "confidence", "full_url"]]
df_detections.to_csv('../data/West Brook 0_01171100.csv', index=False)
df_detections.sort_values('confidence', ascending=False).head(10)

Unnamed: 0,station_id,station_name,imageset_id,imageset_uuid,image_id,filename,timestamp,class,confidence,full_url
10579,29,West Brook 0_01171100,396,33c0736b-f5fb-4da6-91b0-83a72c285382,623902,West Brook Master__2022-05-21__04-43-34(1).JPG,2022-05-21 09:43:34+00:00,animal,0.977956,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
21114,29,West Brook 0_01171100,495,a7918a28-258e-40a7-bf79-8aa1895d4c65,803661,West Brook Master__2022-08-04__04-13-16(1).JPG,2022-08-04 09:13:16+00:00,animal,0.967523,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
18608,29,West Brook 0_01171100,593,5a53b364-7a42-4708-b66e-d837c6b05f3e,992748,11090003.JPG,2022-11-09 16:42:55+00:00,person,0.964125,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
27044,29,West Brook 0_01171100,289,e8d465f6-5784-4231-967f-9000428e9748,442124,West Brook Master__2022-02-09__11-12-49(5).JPG,2022-02-09 16:12:49+00:00,person,0.961507,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
15442,29,West Brook 0_01171100,526,b94e847f-43c3-43cd-973a-14f0c5af29ad,856162,West Brook Master__2022-09-14__09-30-00(1).JPG,2022-09-14 14:30:00+00:00,animal,0.959596,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
19686,29,West Brook 0_01171100,495,a7918a28-258e-40a7-bf79-8aa1895d4c65,802637,West Brook Master__2022-07-24__21-53-06(1).JPG,2022-07-25 02:53:06+00:00,animal,0.959037,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
12791,29,West Brook 0_01171100,396,33c0736b-f5fb-4da6-91b0-83a72c285382,624655,West Brook Master__2022-05-29__14-14-36(1).JPG,2022-05-29 19:14:36+00:00,animal,0.958117,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
27043,29,West Brook 0_01171100,289,e8d465f6-5784-4231-967f-9000428e9748,442123,West Brook Master__2022-02-09__11-12-37(4).JPG,2022-02-09 16:12:37+00:00,person,0.957984,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
21051,29,West Brook 0_01171100,495,a7918a28-258e-40a7-bf79-8aa1895d4c65,803629,West Brook Master__2022-08-03__19-43-51(1).JPG,2022-08-04 00:43:51+00:00,animal,0.957866,https://usgs-chs-conte-prod-fpe-storage.s3.ama...
27045,29,West Brook 0_01171100,289,e8d465f6-5784-4231-967f-9000428e9748,442125,West Brook Master__2022-02-09__11-13-04(1).JPG,2022-02-09 16:13:04+00:00,person,0.956836,https://usgs-chs-conte-prod-fpe-storage.s3.ama...


In [28]:
df_detections

Unnamed: 0,imageset_id,station_id,station_name,imageset_uuid,n_images,image_id,filename,full_url,class,confidence,timestamp
0,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,543378,West Brook Master__2022-03-05__13-29-59(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.005634,2022-03-05 18:29:59+00:00
1,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,543379,West Brook Master__2022-03-05__13-44-59(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.057755,2022-03-05 18:44:59+00:00
2,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,543383,West Brook Master__2022-03-05__14-45-00(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.002601,2022-03-05 19:45:00+00:00
3,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,543383,West Brook Master__2022-03-05__14-45-00(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,vehicle,0.002776,2022-03-05 19:45:00+00:00
4,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,543387,West Brook Master__2022-03-05__15-45-00(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.001240,2022-03-05 20:45:00+00:00
...,...,...,...,...,...,...,...,...,...,...,...
5450,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,548558,West Brook Master__2022-05-02__16-44-59(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.012564,2022-05-02 21:44:59+00:00
5451,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,548559,West Brook Master__2022-05-02__16-59-59(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.007964,2022-05-02 21:59:59+00:00
5452,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,548562,West Brook Master__2022-05-02__17-45-00(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.002035,2022-05-02 22:45:00+00:00
5453,371,29,West Brook 0_01171100,ebfbde2f-222f-4687-9d62-05647f70914b,5280,548564,West Brook Master__2022-05-02__18-14-59(1).JPG,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,animal,0.028210,2022-05-02 23:14:59+00:00


In [35]:
df_detections.sort_values('confidence', ascending=False).head(20)['full_url'].tolist()

['https://usgs-chs-conte-prod-fpe-storage.s3.amazonaws.com/imagesets/33c0736b-f5fb-4da6-91b0-83a72c285382/images/West Brook Master__2022-05-21__04-43-34(1).JPG',
 'https://usgs-chs-conte-prod-fpe-storage.s3.amazonaws.com/imagesets/a7918a28-258e-40a7-bf79-8aa1895d4c65/images/West Brook Master__2022-08-04__04-13-16(1).JPG',
 'https://usgs-chs-conte-prod-fpe-storage.s3.amazonaws.com/imagesets/5a53b364-7a42-4708-b66e-d837c6b05f3e/images/11090003.JPG',
 'https://usgs-chs-conte-prod-fpe-storage.s3.amazonaws.com/imagesets/e8d465f6-5784-4231-967f-9000428e9748/images/West Brook Master__2022-02-09__11-12-49(5).JPG',
 'https://usgs-chs-conte-prod-fpe-storage.s3.amazonaws.com/imagesets/b94e847f-43c3-43cd-973a-14f0c5af29ad/images/West Brook Master__2022-09-14__09-30-00(1).JPG',
 'https://usgs-chs-conte-prod-fpe-storage.s3.amazonaws.com/imagesets/a7918a28-258e-40a7-bf79-8aa1895d4c65/images/West Brook Master__2022-07-24__21-53-06(1).JPG',
 'https://usgs-chs-conte-prod-fpe-storage.s3.amazonaws.com/ima

In [76]:
import requests
from PIL import Image
from io import BytesIO
import matplotlib.pyplot as plt
from IPython.display import display

def load_image(url, width=None):
    response = requests.get(url)

    if response.status_code == 200:
        img_data = BytesIO(response.content)
        img = Image.open(img_data)
        if width is not None:
            # Calculate the new height to maintain aspect ratio
            aspect_ratio = img.height / img.width
            new_height = int(width * aspect_ratio)
            img.thumbnail((width, new_height))
        return(img)
    else:
        print(f"Error: Unable to load image. HTTP status code {response.status_code}")
        return(None)

def display_images_grid(images, grid_size=(3, 3), width=None, output_file=None):
    fig, axes = plt.subplots(*grid_size, figsize=(grid_size[1] * 3, grid_size[0] * 3))

    for image, ax in zip(images, axes.flatten()):
        img = load_image(image['full_url'], width)
        if img is not None:
            ax.imshow(img)
            ax.axis('off')
            ax.set_title(f"{image['filename']}\nclass={image['class']}, confidence={image['confidence']:.3f}", fontsize=6, pad=2)  # Add the URL as the x-axis label
        else:
            ax.set_visible(False)

    plt.tight_layout()

    if (output_file is not None):
        plt.savefig(output_file, dpi=300, bbox_inches='tight')  # Save the output to a file
        plt.close(fig)  # Close the figure to prevent displaying it in the notebook
    else:
        plt.show()

TOP_GRID=(10, 10)
for detection_class in ["animal", "person", "vehicle"]:
    print(detection_class)
    TOP_N = TOP_GRID[0] * TOP_GRID[1]
    top_detections = df_detections[df_detections['class'] == detection_class].sort_values('confidence', ascending=False).head(TOP_N)
    display_images_grid(top_detections.to_dict(orient = "records"), TOP_GRID, width=640, output_file=f"../data/West Brook 0_01171100/top-{detection_class}-{TOP_N}.jpg")

# top_images
# top_detections

animal
person
vehicle
