# FPE Rank Model

In [3]:
import boto3
import time
import json
import os
import pandas as pd
import sagemaker
from io import StringIO

In [4]:
AWS_PROFILE="conte-prod"
AWS_REGION="us-west-2"
JOB_ROLE_ARN="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role"

session = boto3.Session(profile_name=AWS_PROFILE)
s3 = session.client("s3")

In [5]:
def timestamp():
    return time.strftime("%Y%m%d-%H%M%S")

def get_batch_creds(session, role_arn):
    sts = session.client("sts")
    response = sts.assume_role(
        RoleArn=role_arn,
        RoleSessionName=f"fpe-sagemaker-session--{timestamp()}"
    )
    return response['Credentials']

creds = get_batch_creds(session, JOB_ROLE_ARN)
sm_boto_session = boto3.Session(
    aws_access_key_id=creds['AccessKeyId'],
    aws_secret_access_key=creds['SecretAccessKey'],
    aws_session_token=creds['SessionToken'],
    region_name=AWS_REGION
)

sm_session = sagemaker.Session(boto_session = sm_boto_session)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/jdwalker/.config/sagemaker/config.yaml


In [15]:
fpe_dir = f"/mnt/d/fpe/rank"
station_name = "West Brook 0_01171100"
station_alias = "WESTB0"

variable = "FLOW_CFS"
dataset_version = "20240327"
model_version = "20240328"
dataset_dir = f"{fpe_dir}/{station_name}/{variable}/{dataset_version}"
model_dir = f"{dataset_dir}/models/{model_version}"
job_name = f"fpe-rank-{station_alias}-{timestamp()}"

if not os.path.exists(model_dir):
    raise Exception(f"model_dir not found ({model_dir})")

model_bucket = "usgs-chs-conte-prod-fpe-models"
storage_bucket = "usgs-chs-conte-prod-fpe-storage"

station_key = f"rank/{station_alias}"
data_key = f"{station_key}/{variable}/{dataset_version}"
model_key = f"{data_key}/models/{model_version}"
input_key = f"{model_key}/input"
jobs_key = f"{model_key}/jobs"
checkpoints_key = f"{model_key}/checkpoints"
transform_key = f"{model_key}/transform"

input_path = f"s3://{model_bucket}/{input_key}"
output_path = f"s3://{model_bucket}/{jobs_key}"
checkpoint_path = f"s3://{model_bucket}/{checkpoints_key}"
transform_path = f"s3://{model_bucket}/{transform_key}"
(job_name, input_path, output_path, checkpoint_path, transform_path)

('fpe-rank-WESTB0-20240328-215259',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/FLOW_CFS/20240327/models/20240328/input',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/FLOW_CFS/20240327/models/20240328/jobs',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/FLOW_CFS/20240327/models/20240328/checkpoints',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/FLOW_CFS/20240327/models/20240328/transform')

In [7]:
# upload input files to s3

for subdir, dirs, files in os.walk(f"{model_dir}/input"):
    for file in files:
        s3_key = f"{input_key}/{file}"
        print(f"uploading: {file} -> {s3_key}")
        s3.upload_file(Filename=os.path.join(subdir, file), Bucket=model_bucket, Key=s3_key)

uploading: annotations.csv -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/annotations.csv
uploading: images-months.png -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/images-months.png
uploading: images-ts.png -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/images-ts.png
uploading: images-values-cfd.png -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/images-values-cfd.png
uploading: images-values-hist.png -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/images-values-hist.png
uploading: images-values-ts.png -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/images-values-ts.png
uploading: images.csv -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/images.csv
uploading: manifest.json -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/manifest.json
uploading: pairs-timestamps.png -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/pairs-timestamps.png
uploading: pairs-values-delta.png -> rank/WESTB0/FLOW_CFS/20240327/models/20240328/inp

In [8]:
# def timestamp():
#     return time.strftime("%Y%m%d-%H%M%S")

# sts = session.client("sts")
# response = sts.assume_role(
#     RoleArn="arn:aws:iam::694155575325:role/fpe-prod-batch-job-role",
#     RoleSessionName=f"fpe-batch-session--{timestamp()}"
# )
# creds = response['Credentials']
# batch_boto_session = boto3.Session(
#     aws_access_key_id=creds['AccessKeyId'],
#     aws_secret_access_key=creds['SecretAccessKey'],
#     aws_session_token=creds['SessionToken'],
#     region_name=AWS_REGION
# )
# batch = batch_boto_session.client('batch')

# batch.terminate_job(jobId="aaf02bed-6135-4bb3-bbe7-54f58237a7a0", reason="running indefinitely")

## Training

In [9]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    entry_point="train.py",
    source_dir="src",
    py_version="py39",
    framework_version="1.13.1",
    role="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role",
    instance_count=1,
    instance_type="ml.p3.2xlarge",
    volume_size=100,
    hyperparameters={
        "epochs": 20
    },
    output_path=output_path,
    checkpoint_s3_uri=checkpoint_path,
    code_location=output_path,
    disable_output_compression=False,
    sagemaker_session=sm_session
)

In [10]:
from sagemaker.inputs import TrainingInput

input_images = TrainingInput(
    s3_data = f"{input_path}/manifest.json",
    s3_data_type = "ManifestFile",
    input_mode = "File"
)
(input_images, f"{input_path}/manifest.json", input_path)

(<sagemaker.inputs.TrainingInput at 0x7f556e5ad1c0>,
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/FLOW_CFS/20240327/models/20240328/input/manifest.json',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/FLOW_CFS/20240327/models/20240328/input')

In [16]:
estimator.fit({ "images": input_images, "values": input_path }, job_name=job_name, wait=False)

Using provided s3_resource


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fpe-rank-WESTB0-20240328-215259


In [13]:
# sm_session.stop_training_job("pytorch-training-2024-03-29-01-49-35-117")

### Download Output

In [18]:
with open(f"{dataset_dir}/dataset/station.json", "r") as f:
    station = json.load(f)
station

{'id': 29,
 'user_id': '0626d282-0267-40b0-8f17-214c8f72e551',
 'name': 'West Brook 0_01171100',
 'description': '50 feet downstream river left of pantry rd bridge.',
 'latitude': 42.4143,
 'longitude': -72.6293,
 'timezone': 'US/Eastern',
 'created_at': '2022-02-10 15:05:42',
 'updated_at': '2023-01-20 14:41:21',
 'private': False,
 'nwis_id': '01171100',
 'waterbody_type': 'ST',
 'status': 'ACTIVE'}

In [19]:
def download_output(session, job_name, jobs_key, model_dir):
    s3 = session.client("s3")
    output_dir = f"{model_dir}/output"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_file = f"{output_dir}/output.tar.gz"
    print(f"job: {job_name}")
    output_key = f"{jobs_key}/{job_name}/output/output.tar.gz"
    print(f"downloading: s3://{model_bucket}/{output_key} -> {output_file}")
    s3.download_file(Bucket=model_bucket, Key=output_key, Filename=output_file)
    print(f"extracting: {output_file} -> {output_dir}")
    !tar -xzvf {output_file} -C {output_dir}
    output_file

download_output(session, job_name, jobs_key, model_dir)

job: fpe-rank-WESTB0-20240328-215259
downloading: s3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/FLOW_CFS/20240327/models/20240328/jobs/fpe-rank-WESTB0-20240328-215259/output/output.tar.gz -> /mnt/d/fpe/rank/West Brook 0_01171100/FLOW_CFS/20240327/models/20240328/output/output.tar.gz


ClientError: An error occurred (404) when calling the HeadObject operation: Not Found

## Batch Transform

NameError: name 'station_dir' is not defined

job: fpe-rank-2023-10-26-19-59-36-250
downloading: s3://usgs-chs-conte-prod-fpe-models/rank/01651770/20231026/jobs/fpe-rank-2023-10-26-19-59-36-250/output/output.tar.gz -> D:/fpe/sites/01651770/models/20231026/output/output.tar.gz
extracting: D:/fpe/sites/01651770/models/20231026/output/output.tar.gz -> D:/fpe/sites/01651770/models/20231026/output


x args.json
x metrics.csv


In [60]:
transform_images_file = f"{model_dir}/input/images.csv"
transform_images = pd.read_csv(transform_images_file)
transform_images["timestamp"] = pd.to_datetime(transform_images["timestamp"]).dt.tz_convert(station["timezone"])
print(f"rows: {len(transform_images)}")

rows: 16363


In [61]:
transform_images["hour"] = transform_images["timestamp"].dt.hour
transform_images.groupby("hour").size().reset_index(name="count")

Unnamed: 0,hour,count
0,7,1346
1,8,1365
2,9,1364
3,10,1364
4,11,1365
5,12,1368
6,13,1368
7,14,1369
8,15,1370
9,16,1370


In [62]:
transform_images["month"] = transform_images["timestamp"].dt.month
transform_images.groupby("month").size().reset_index(name="count")

Unnamed: 0,month,count
0,1,1858
1,2,1680
2,3,1860
3,4,1799
4,5,1860
5,6,1800
6,7,1860
7,8,240
8,10,112
9,11,1436


In [63]:
transform_images = transform_images[transform_images["timestamp"].dt.hour.between(7, 18)]
transform_images["timestamp"] = transform_images["timestamp"].dt.tz_convert("UTC")
transform_images.to_csv(f"{model_dir}/input/images.csv", index=False)

In [64]:
s3_key = f"{transform_key}/images.csv"
print(f"uploading: {transform_images_file} -> {s3_key}")
s3.upload_file(Filename=transform_images_file, Bucket=model_bucket, Key=s3_key)

uploading: D:/fpe/sites/01651770/models/20231026/input/images.csv -> rank/01651770/20231026/transform/images.csv


In [65]:
transform_images

Unnamed: 0,station_name,station_id,imageset_id,image_id,timestamp,filename,url,flow_cfs,hour,month
0,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036972,2022-10-26 18:30:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.14,14,10
1,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036973,2022-10-26 19:00:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.14,15,10
2,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036974,2022-10-26 19:30:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.14,15,10
3,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036975,2022-10-26 20:00:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.16,16,10
4,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036976,2022-10-26 20:30:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.14,16,10
...,...,...,...,...,...,...,...,...,...,...
16358,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,1991,2793662,2023-08-04 22:00:00+00:00,imagesets/5a130b49-b468-4e7d-a36b-5b6ee4d90452...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.28,18,8
16359,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,1991,2793664,2023-08-04 22:12:00+00:00,imagesets/5a130b49-b468-4e7d-a36b-5b6ee4d90452...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.28,18,8
16360,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,1991,2793666,2023-08-04 22:24:00+00:00,imagesets/5a130b49-b468-4e7d-a36b-5b6ee4d90452...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.28,18,8
16361,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,1991,2793669,2023-08-04 22:36:00+00:00,imagesets/5a130b49-b468-4e7d-a36b-5b6ee4d90452...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.26,18,8


In [66]:
manifest = transform_images['filename'].to_list()
manifest.insert(0, {"prefix": f"s3://{storage_bucket}/"})

manifest_key = f"{transform_key}/manifest.json"
body = json.dumps(manifest)
print(f"uploading transform manifest: {manifest_key} (n = {len(manifest) - 1})")
s3.put_object(Bucket=model_bucket, Key=manifest_key, Body=body)

uploading transform manifest: rank/01651770/20231026/transform/manifest.json (n = 16363)


{'ResponseMetadata': {'RequestId': 'NXHVJP1HD7C4QE69',
  'HostId': 'o3suoY98LakWiRjfHKLnf1gUPpkC2Na3/Kq341K5NdZVWXrEa1dDydcefAm8KxCRf1PFF5hrv4DZkePzXXLx1Q==',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'o3suoY98LakWiRjfHKLnf1gUPpkC2Na3/Kq341K5NdZVWXrEa1dDydcefAm8KxCRf1PFF5hrv4DZkePzXXLx1Q==',
   'x-amz-request-id': 'NXHVJP1HD7C4QE69',
   'date': 'Thu, 26 Oct 2023 22:58:12 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"94b537f0a7d1275f65ca68b4bffdcd29"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"94b537f0a7d1275f65ca68b4bffdcd29"',
 'ServerSideEncryption': 'AES256'}

In [67]:
with open(f"{model_dir}/job.txt", "r") as f:
    job_name = f.readline().strip()
model_path = f"{output_path}/{job_name}/output/model.tar.gz"
model_path

's3://usgs-chs-conte-prod-fpe-models/rank/01651770/20231026/jobs/fpe-rank-2023-10-26-19-59-36-250/output/model.tar.gz'

In [68]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(
    model_data=model_path,
    role="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role",
    py_version="py38",
    framework_version="1.12",
    source_dir="src/",
    entry_point="transform.py",
    sagemaker_session = sm_session
)
pytorch_model

<sagemaker.pytorch.model.PyTorchModel at 0x1a341109280>

In [69]:
transformer = pytorch_model.transformer(
    instance_count=1,
    instance_type="ml.c5.xlarge",
    output_path=transform_path
)
(transformer,transform_path)

INFO:sagemaker:Repacking model artifact (s3://usgs-chs-conte-prod-fpe-models/rank/01651770/20231026/jobs/fpe-rank-2023-10-26-19-59-36-250/output/model.tar.gz), script artifact (src/), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-west-2-694155575325/pytorch-inference-2023-10-26-22-58-16-275/model.tar.gz. This may take some time depending on model size...


INFO:sagemaker:Creating model with name: pytorch-inference-2023-10-26-22-58-52-069


(<sagemaker.transformer.Transformer at 0x1a341ff6a00>,
 's3://usgs-chs-conte-prod-fpe-models/rank/01651770/20231026/transform')

In [70]:
transformer.transform(
    data=f"{transform_path}/manifest.json",
    data_type="ManifestFile",
    content_type="image/jpg",
    wait=False,
)

INFO:sagemaker:Creating transform job with name: pytorch-inference-2023-10-26-22-58-59-941


In [132]:
sm_session.stop_transform_job("pytorch-inference-2023-09-29-00-44-57-671")

INFO:sagemaker:Stopping transform job: pytorch-inference-2023-09-29-00-44-57-671


## Process Transform Output

In [71]:
def process_transform_output (session, site, model_id, job_size = 5000, root_dir = "D:/fpe/sites"):
    lambda_client = session.client("lambda")
    transform_prefix = f"rank/{site}/{model_id}/transform"
    df = pd.read_csv(f"{root_dir}/{site}/models/{model_id}/input/images.csv")
    skip = 0
    while skip < len(df):
        payload = {
            "action": "process_transform_output",
            "bucket_name": model_bucket,
            "data_file": f"{transform_prefix}/images.csv",
            "data_prefix": transform_prefix,
            "output_prefix": transform_prefix,
            "n": job_size,
            "skip": skip
        }
        print(f"invoke: skip={skip}, n={job_size} ({skip} to {skip + job_size - 1})")
        lambda_client.invoke(
            FunctionName="fpe-prod-lambda-models",
            InvocationType="Event",
            Payload=json.dumps(payload)
        )
        skip += job_size
    return df

In [75]:
process_transform_output(session, "01651770", "20231026")

invoke: skip=0, n=5000 (0 to 4999)
invoke: skip=5000, n=5000 (5000 to 9999)
invoke: skip=10000, n=5000 (10000 to 14999)
invoke: skip=15000, n=5000 (15000 to 19999)


Unnamed: 0,station_name,station_id,imageset_id,image_id,timestamp,filename,url,flow_cfs,hour,month
0,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036972,2022-10-26 18:30:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.14,14,10
1,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036973,2022-10-26 19:00:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.14,15,10
2,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036974,2022-10-26 19:30:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.14,15,10
3,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036975,2022-10-26 20:00:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.16,16,10
4,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,610,1036976,2022-10-26 20:30:00+00:00,imagesets/2ae2fe0a-8d71-45a7-b3fe-902455392131...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.14,16,10
...,...,...,...,...,...,...,...,...,...,...
16358,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,1991,2793662,2023-08-04 22:00:00+00:00,imagesets/5a130b49-b468-4e7d-a36b-5b6ee4d90452...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.28,18,8
16359,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,1991,2793664,2023-08-04 22:12:00+00:00,imagesets/5a130b49-b468-4e7d-a36b-5b6ee4d90452...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.28,18,8
16360,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,1991,2793666,2023-08-04 22:24:00+00:00,imagesets/5a130b49-b468-4e7d-a36b-5b6ee4d90452...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.28,18,8
16361,01651770 HICKEY RUN AT NATIONAL ARBORETUM,79,1991,2793669,2023-08-04 22:36:00+00:00,imagesets/5a130b49-b468-4e7d-a36b-5b6ee4d90452...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,0.26,18,8


In [76]:
def combine_predictions (session, site, model_id, job_size = 5000, root_dir = "D:/fpe/sites"):
    s3 = session.client("s3")
    transform_prefix = f"rank/{site}/{model_id}/transform"
    df = pd.read_csv(f"{root_dir}/{site}/models/{model_id}/input/images.csv")
    keys = [f"{transform_prefix}/predictions-{skip:05d}-{(skip + job_size - 1):05d}.csv" for skip in range(0, len(df), job_size)]

    dfs = []
    for key in keys:
        print(key)
        csv_obj = s3.get_object(Bucket=model_bucket, Key=key)
        csv_data = csv_obj['Body'].read().decode('utf-8')
        dfs.append(pd.read_csv(StringIO(csv_data)))

    df = pd.concat(dfs, ignore_index=True)

    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False)
    output_key = f"{transform_key}/predictions.csv"
    s3.put_object(Body=csv_buffer.getvalue(), Bucket=model_bucket, Key=output_key)
    transform_path = f"{root_dir}/{site}/models/{model_id}/transform"
    if not os.path.exists(transform_path):
        os.makedirs(transform_path)
    df.to_csv(f"{transform_path}/predictions.csv", index=False)
    df


In [79]:
combine_predictions(session, "01651770", "20231026")

rank/01651770/20231026/transform/predictions-00000-04999.csv
rank/01651770/20231026/transform/predictions-05000-09999.csv
rank/01651770/20231026/transform/predictions-10000-14999.csv
rank/01651770/20231026/transform/predictions-15000-19999.csv
