# FPE Rank Model

In [34]:
import boto3
import time
import json
import os
import pandas as pd
import sagemaker
from io import StringIO

In [2]:
AWS_PROFILE="conte-prod"
AWS_REGION="us-west-2"
JOB_ROLE_ARN="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role"

session = boto3.Session(profile_name=AWS_PROFILE)
s3 = session.client("s3")

In [3]:
model_id = "20230922"
site = "PATRC"
site_dir = f"D:/fpe/sites/{site}"
model_dir = f"{site_dir}/models/{model_id}"

if not os.path.exists(model_dir):
    os.makedirs(model_dir)

model_bucket = "usgs-chs-conte-prod-fpe-models"
storage_bucket = "usgs-chs-conte-prod-fpe-storage"

site_key = f"rank/{site}"
data_key = f"{site_key}/data"
model_key = f"{site_key}/{model_id}"
input_key = f"{model_key}/input"
output_key = f"{model_key}/jobs"
checkpoint_key = f"{model_key}/checkpoints"
transform_key = f"{model_key}/transform"

input_path = f"s3://{model_bucket}/{input_key}"
output_path = f"s3://{model_bucket}/{output_key}"
checkpoint_path = f"s3://{model_bucket}/{checkpoint_key}"
transform_path = f"s3://{model_bucket}/{transform_key}"
(input_path, output_path, checkpoint_path, transform_path)

('s3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/input',
 's3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/jobs',
 's3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/checkpoints',
 's3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/transform')

In [100]:
# upload input files to s3

for subdir, dirs, files in os.walk(f"{model_dir}/input"):
    for file in files:
        s3_key = f"{input_key}/{file}"
        print(f"uploading: {file} -> {s3_key}")
        s3.upload_file(Filename=os.path.join(subdir, file), Bucket=model_bucket, Key=s3_key)

uploading: args.json -> rank/LANESV/20230922/input/args.json
uploading: flow-images.csv -> rank/LANESV/20230922/input/flow-images.csv
uploading: manifest.json -> rank/LANESV/20230922/input/manifest.json
uploading: pairs-test.csv -> rank/LANESV/20230922/input/pairs-test.csv
uploading: pairs-train.csv -> rank/LANESV/20230922/input/pairs-train.csv
uploading: pairs-val.csv -> rank/LANESV/20230922/input/pairs-val.csv


In [83]:
def timestamp():
    return time.strftime("%Y%m%d-%H%M%S")

def get_batch_creds(session, role_arn):
    sts = session.client("sts")
    response = sts.assume_role(
        RoleArn=role_arn,
        RoleSessionName=f"fpe-sagemaker-session--{timestamp()}"
    )
    return response['Credentials']

creds = get_batch_creds(session, JOB_ROLE_ARN)
sm_boto_session = boto3.Session(
    aws_access_key_id=creds['AccessKeyId'],
    aws_secret_access_key=creds['SecretAccessKey'],
    aws_session_token=creds['SessionToken'],
    region_name=AWS_REGION
)

sm_session = sagemaker.Session(boto_session = sm_boto_session)

In [3]:
# def timestamp():
#     return time.strftime("%Y%m%d-%H%M%S")

# sts = session.client("sts")
# response = sts.assume_role(
#     RoleArn="arn:aws:iam::694155575325:role/fpe-prod-batch-job-role",
#     RoleSessionName=f"fpe-batch-session--{timestamp()}"
# )
# creds = response['Credentials']
# batch_boto_session = boto3.Session(
#     aws_access_key_id=creds['AccessKeyId'],
#     aws_secret_access_key=creds['SecretAccessKey'],
#     aws_session_token=creds['SessionToken'],
#     region_name=AWS_REGION
# )
# batch = batch_boto_session.client('batch')

# batch.terminate_job(jobId="aaf02bed-6135-4bb3-bbe7-54f58237a7a0", reason="running indefinitely")

## Training

In [76]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    entry_point="train.py",
    source_dir="src",
    py_version="py38",
    framework_version="1.12",
    role="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role",
    instance_count=1,
    instance_type="ml.p3.2xlarge",
    volume_size=100,
    hyperparameters={
        "epochs": 15
    },
    base_job_name="fpe-rank",
    output_path=output_path,
    checkpoint_s3_uri=checkpoint_path,
    code_location=output_path,
    disable_output_compression=False,
    sagemaker_session=sm_session
)

In [77]:
from sagemaker.inputs import TrainingInput

input_images = TrainingInput(
    s3_data = f"{input_path}/manifest.json",
    s3_data_type = "ManifestFile",
    input_mode = "File"
)
(input_images, f"{input_path}/manifest.json", input_path)

(<sagemaker.inputs.TrainingInput at 0x15a2f27a0d0>,
 's3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/input/manifest.json',
 's3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/input')

In [78]:
estimator.fit({ "images": input_images, "values": input_path }, wait=False)

Using provided s3_resource


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fpe-rank-2023-09-23-01-48-47-142


In [79]:
sm_session.stop_training_job("fpe-rank-2023-09-23-01-46-19-458")

## Batch Transform

In [140]:
# create manifest
transform_data_key = f"{input_key}/flow-images.csv"

# Read CSV file from S3
print(f"downloading transform data file: s3://{model_bucket}/{transform_data_key}")
csv_obj = s3.get_object(Bucket=model_bucket, Key=transform_data_key)
csv_data = csv_obj['Body'].read().decode('utf-8')
transform_data = pd.read_csv(StringIO(csv_data))
print(f"rows: {len(transform_data)}")

downloading transform data file: s3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/input/flow-images.csv
rows: 31573


In [141]:
transform_data["timestamp"] = pd.to_datetime(transform_data["timestamp"]).dt.tz_convert("America/New_York")

#(transform_data["timestamp"][100], transform_data["filename"][100])

# filter out nighttime
transform_data = transform_data[transform_data["timestamp"].dt.hour.between(7, 18)]
print(f"filtered rows: {len(transform_data)}")

filtered rows: 15781


In [142]:
transform_data["hour"] = transform_data["timestamp"].dt.hour
transform_data.groupby("hour").size().reset_index(name="count")

transform_data["month"] = transform_data["timestamp"].dt.month
transform_data.groupby("month").size().reset_index(name="count")

Unnamed: 0,month,count
0,1,1488
1,2,1342
2,3,1488
3,4,1438
4,5,1488
5,6,1440
6,7,1488
7,8,1487
8,9,271
9,10,923


In [153]:
file = f"{model_dir}/transform/flow-images.csv"
transform_data.to_csv(file, index=False)

In [144]:
s3_key = f"{transform_key}/flow-images.csv"
print(f"uploading: {file} -> {s3_key}")
s3.upload_file(Filename=file, Bucket=model_bucket, Key=s3_key)

uploading: D:/fpe/sites/PATRC/models/20230922/transform/flow-images.csv -> rank/PATRC/20230922/transform/flow-images.csv


In [145]:
transform_data

Unnamed: 0,station_name,station_id,imageset_id,image_id,timestamp,filename,url,flow_cfs,hour,month
0,01359135_Patroon Creek,80,587,984697,2022-10-12 16:15:00-04:00,imagesets/526f3f68-6a19-47ef-afa1-3663cbba51ed...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,12.7,16,10
1,01359135_Patroon Creek,80,587,984698,2022-10-12 16:30:00-04:00,imagesets/526f3f68-6a19-47ef-afa1-3663cbba51ed...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,12.7,16,10
2,01359135_Patroon Creek,80,587,984699,2022-10-12 16:45:00-04:00,imagesets/526f3f68-6a19-47ef-afa1-3663cbba51ed...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,12.7,16,10
3,01359135_Patroon Creek,80,587,984700,2022-10-12 17:00:00-04:00,imagesets/526f3f68-6a19-47ef-afa1-3663cbba51ed...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,12.7,17,10
4,01359135_Patroon Creek,80,587,984701,2022-10-12 17:15:00-04:00,imagesets/526f3f68-6a19-47ef-afa1-3663cbba51ed...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,12.7,17,10
...,...,...,...,...,...,...,...,...,...,...
31568,01359135_Patroon Creek,80,2073,2951330,2023-09-06 13:30:00-04:00,imagesets/44c56acf-60e1-4148-be1d-167713d0e98a...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,16.1,13,9
31569,01359135_Patroon Creek,80,2073,2951332,2023-09-06 13:45:00-04:00,imagesets/44c56acf-60e1-4148-be1d-167713d0e98a...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,16.1,13,9
31570,01359135_Patroon Creek,80,2073,2951334,2023-09-06 14:00:00-04:00,imagesets/44c56acf-60e1-4148-be1d-167713d0e98a...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,16.1,14,9
31571,01359135_Patroon Creek,80,2073,2951337,2023-09-06 14:15:00-04:00,imagesets/44c56acf-60e1-4148-be1d-167713d0e98a...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,16.1,14,9


In [146]:
manifest = transform_data['filename'].to_list()
manifest.insert(0, {"prefix": f"s3://{storage_bucket}/"})

manifest_key = f"{transform_key}/manifest.json"
body = json.dumps(manifest)
print(f"uploading transform manifest: {manifest_key} (n = {len(manifest) - 1})")
s3.put_object(Bucket=model_bucket, Key=manifest_key, Body=body)

uploading transform manifest: rank/PATRC/20230922/transform/manifest.json (n = 15781)


{'ResponseMetadata': {'RequestId': '0BYE5GQZ40JV8DDK',
  'HostId': 'zu/cleWe5qyTtC+EqDTYVvZ+2SwzzFLoQUqnYx+FEw3rGq28zsrQU5BXbEkYyZM0UZ7IIhw0Ab4=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'zu/cleWe5qyTtC+EqDTYVvZ+2SwzzFLoQUqnYx+FEw3rGq28zsrQU5BXbEkYyZM0UZ7IIhw0Ab4=',
   'x-amz-request-id': '0BYE5GQZ40JV8DDK',
   'date': 'Sat, 23 Sep 2023 13:07:58 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"7f17d68af9020daf5a68632521092268"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"7f17d68af9020daf5a68632521092268"',
 'ServerSideEncryption': 'AES256'}

In [147]:
job_name = "fpe-rank-2023-09-23-01-48-47-142"
model_path = f"{output_path}/{job_name}/output/model.tar.gz"
model_path

's3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/jobs/fpe-rank-2023-09-23-01-48-47-142/output/model.tar.gz'

In [148]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(
    model_data=model_path,
    role="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role",
    py_version="py38",
    framework_version="1.12",
    source_dir="src/",
    entry_point="transform.py",
    sagemaker_session = sm_session
)
pytorch_model

<sagemaker.pytorch.model.PyTorchModel at 0x15a35557100>

In [149]:
transformer = pytorch_model.transformer(
    instance_count=1,
    instance_type="ml.c5.xlarge",
    output_path=transform_path
)
(transformer,transform_path)

INFO:sagemaker:Repacking model artifact (s3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/jobs/fpe-rank-2023-09-23-01-48-47-142/output/model.tar.gz), script artifact (src/), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-west-2-694155575325/pytorch-inference-2023-09-23-13-08-06-046/model.tar.gz. This may take some time depending on model size...


INFO:sagemaker:Creating model with name: pytorch-inference-2023-09-23-13-08-42-098


(<sagemaker.transformer.Transformer at 0x15a34661820>,
 's3://usgs-chs-conte-prod-fpe-models/rank/PATRC/20230922/transform')

In [150]:
transformer.transform(
    data=f"{transform_path}/manifest.json",
    data_type="ManifestFile",
    content_type="image/jpg",
    wait=False,
)

INFO:sagemaker:Creating transform job with name: pytorch-inference-2023-09-23-13-08-45-957


In [9]:
sm_session.stop_transform_job("pytorch-inference-2023-09-20-13-46-27-309")

INFO:sagemaker:Stopping transform job: pytorch-inference-2023-09-04-16-09-22-922


In [17]:
pytorch_model.delete_model()

INFO:sagemaker:Deleting model with name: pytorch-inference-2023-09-21-16-13-31-012


ClientError: An error occurred (ExpiredTokenException) when calling the DeleteModel operation: The security token included in the request is expired

## Process Transform Output

In [4]:
def process_transform_output (session, site, model_id, job_size = 5000, root_dir = "D:/fpe/sites"):
    lambda_client = session.client("lambda")
    transform_prefix = f"rank/{site}/{model_id}/transform"
    df = pd.read_csv(f"{root_dir}/{site}/models/{model_id}/transform/flow-images.csv")
    skip = 0
    while skip < len(df):
        payload = {
            "action": "process_transform_output",
            "bucket_name": model_bucket,
            "data_file": f"{transform_prefix}/flow-images.csv",
            "data_prefix": transform_prefix,
            "output_prefix": transform_prefix,
            "n": job_size,
            "skip": skip
        }
        print(f"invoke: skip={skip}, n={job_size} ({skip} to {skip + job_size - 1})")
        lambda_client.invoke(
            FunctionName="fpe-prod-lambda-models",
            InvocationType="Event",
            Payload=json.dumps(payload)
        )
        skip += job_size
    return df

In [9]:
process_transform_output(session, "AVERYBB", "20230922")

invoke: skip=0, n=5000 (0 to 4999)
invoke: skip=5000, n=5000 (5000 to 9999)
invoke: skip=10000, n=5000 (10000 to 14999)
invoke: skip=15000, n=5000 (15000 to 19999)
invoke: skip=20000, n=5000 (20000 to 24999)
invoke: skip=25000, n=5000 (25000 to 29999)
invoke: skip=30000, n=5000 (30000 to 34999)


Unnamed: 0,station_name,station_id,imageset_id,image_id,timestamp,filename,url,flow_cfs,hour,month
0,Avery Brook_Bridge_01171000,12,95,153582,2021-03-10 11:01:17-05:00,imagesets/fec63b82-d9fa-4844-ab9b-cda8999122b0...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.390000,11,3
1,Avery Brook_Bridge_01171000,12,95,153583,2021-03-10 11:02:24-05:00,imagesets/fec63b82-d9fa-4844-ab9b-cda8999122b0...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.390000,11,3
2,Avery Brook_Bridge_01171000,12,95,153584,2021-03-10 11:16:18-05:00,imagesets/fec63b82-d9fa-4844-ab9b-cda8999122b0...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.390000,11,3
3,Avery Brook_Bridge_01171000,12,95,153585,2021-03-10 11:31:18-05:00,imagesets/fec63b82-d9fa-4844-ab9b-cda8999122b0...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.390000,11,3
4,Avery Brook_Bridge_01171000,12,95,153586,2021-03-10 11:46:18-05:00,imagesets/fec63b82-d9fa-4844-ab9b-cda8999122b0...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.390867,11,3
...,...,...,...,...,...,...,...,...,...,...
31204,Avery Brook_Bridge_01171000,12,2079,2962371,2023-09-06 09:45:00-04:00,imagesets/7e360ec6-8bb5-4882-9e18-19898844da69...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,1.780000,9,9
31205,Avery Brook_Bridge_01171000,12,2079,2962372,2023-09-06 10:00:00-04:00,imagesets/7e360ec6-8bb5-4882-9e18-19898844da69...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,1.780000,10,9
31206,Avery Brook_Bridge_01171000,12,2079,2962373,2023-09-06 10:15:00-04:00,imagesets/7e360ec6-8bb5-4882-9e18-19898844da69...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,1.780000,10,9
31207,Avery Brook_Bridge_01171000,12,2079,2962374,2023-09-06 10:30:00-04:00,imagesets/7e360ec6-8bb5-4882-9e18-19898844da69...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,1.780000,10,9


In [12]:
def combine_predictions (session, site, model_id, job_size = 5000, root_dir = "D:/fpe/sites"):
    s3 = session.client("s3")
    transform_prefix = f"rank/{site}/{model_id}/transform"
    df = pd.read_csv(f"{root_dir}/{site}/models/{model_id}/transform/flow-images.csv")
    keys = [f"{transform_prefix}/predictions-{skip:05d}-{(skip + job_size - 1):05d}.csv" for skip in range(0, len(df), job_size)]

    dfs = []
    for key in keys:
        print(key)
        csv_obj = s3.get_object(Bucket=model_bucket, Key=key)
        csv_data = csv_obj['Body'].read().decode('utf-8')
        dfs.append(pd.read_csv(StringIO(csv_data)))

    df = pd.concat(dfs, ignore_index=True)

    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False)
    output_key = f"{transform_key}/predictions.csv"
    s3.put_object(Body=csv_buffer.getvalue(), Bucket=model_bucket, Key=output_key)
    df.to_csv(f"{root_dir}/{site}/models/{model_id}/transform/predictions.csv", index=False)
    df


In [33]:
combine_predictions(session, "AVERYBB", "20230922")

rank/AVERYBB/20230922/transform/predictions-00000-04999.csv
rank/AVERYBB/20230922/transform/predictions-05000-09999.csv
rank/AVERYBB/20230922/transform/predictions-10000-14999.csv
rank/AVERYBB/20230922/transform/predictions-15000-19999.csv
rank/AVERYBB/20230922/transform/predictions-20000-24999.csv
rank/AVERYBB/20230922/transform/predictions-25000-29999.csv
rank/AVERYBB/20230922/transform/predictions-30000-34999.csv


In [29]:
def download_output(session, site, model_id, job_id, root_dir = "D:/fpe/sites"):
    s3 = session.client("s3")
    output_dir = f"{root_dir}/{site}/models/{model_id}/output"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_file = f"{root_dir}/{site}/models/{model_id}/output/output.tar.gz"
    output_key = f"rank/{site}/{model_id}/jobs/{job_id}/output/output.tar.gz"
    s3.download_file(Bucket=model_bucket, Key=output_key, Filename=output_file)
    !tar -xzvf {output_file} -C {output_dir}
    output_file

In [32]:
download_output(session, "AVERYBB", "20230922", "fpe-rank-2023-09-23-01-48-02-464")

x metrics.csv
x args.json
