# FPE Rank Model

In [116]:
import boto3
import time
import json
import os
import pandas as pd
import sagemaker
from io import StringIO

In [117]:
AWS_PROFILE="conte-prod"
AWS_REGION="us-west-2"
JOB_ROLE_ARN="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role"

session = boto3.Session(profile_name=AWS_PROFILE)
s3 = session.client("s3")

In [118]:
def timestamp():
    return time.strftime("%Y%m%d-%H%M%S")

def get_batch_creds(session, role_arn):
    sts = session.client("sts")
    response = sts.assume_role(
        RoleArn=role_arn,
        RoleSessionName=f"fpe-sagemaker-session--{timestamp()}"
    )
    return response['Credentials']

creds = get_batch_creds(session, JOB_ROLE_ARN)
sm_boto_session = boto3.Session(
    aws_access_key_id=creds['AccessKeyId'],
    aws_secret_access_key=creds['SecretAccessKey'],
    aws_session_token=creds['SessionToken'],
    region_name=AWS_REGION
)

sm_session = sagemaker.Session(boto_session = sm_boto_session)

In [119]:
model_id = "20230922-50"
site = "WESTB0"
site_dir = f"D:/fpe/sites/{site}"
model_dir = f"{site_dir}/models/{model_id}"

if not os.path.exists(model_dir):
    os.makedirs(model_dir)

model_bucket = "usgs-chs-conte-prod-fpe-models"
storage_bucket = "usgs-chs-conte-prod-fpe-storage"

site_key = f"rank/{site}"
data_key = f"{site_key}/data"
model_key = f"{site_key}/{model_id}"
input_key = f"{model_key}/input"
output_key = f"{model_key}/jobs"
checkpoint_key = f"{model_key}/checkpoints"
transform_key = f"{model_key}/transform"

input_path = f"s3://{model_bucket}/{input_key}"
output_path = f"s3://{model_bucket}/{output_key}"
checkpoint_path = f"s3://{model_bucket}/{checkpoint_key}"
transform_path = f"s3://{model_bucket}/{transform_key}"
(input_path, output_path, checkpoint_path, transform_path)

('s3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-50/input',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-50/jobs',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-50/checkpoints',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-50/transform')

In [98]:
# upload input files to s3

for subdir, dirs, files in os.walk(f"{model_dir}/input"):
    for file in files:
        s3_key = f"{input_key}/{file}"
        print(f"uploading: {file} -> {s3_key}")
        s3.upload_file(Filename=os.path.join(subdir, file), Bucket=model_bucket, Key=s3_key)

uploading: args.json -> rank/WESTB0/20230922-75/input/args.json
uploading: images.csv -> rank/WESTB0/20230922-75/input/images.csv
uploading: manifest.json -> rank/WESTB0/20230922-75/input/manifest.json
uploading: pairs-test.csv -> rank/WESTB0/20230922-75/input/pairs-test.csv
uploading: pairs-train.csv -> rank/WESTB0/20230922-75/input/pairs-train.csv
uploading: pairs-val.csv -> rank/WESTB0/20230922-75/input/pairs-val.csv


In [35]:
# def timestamp():
#     return time.strftime("%Y%m%d-%H%M%S")

# sts = session.client("sts")
# response = sts.assume_role(
#     RoleArn="arn:aws:iam::694155575325:role/fpe-prod-batch-job-role",
#     RoleSessionName=f"fpe-batch-session--{timestamp()}"
# )
# creds = response['Credentials']
# batch_boto_session = boto3.Session(
#     aws_access_key_id=creds['AccessKeyId'],
#     aws_secret_access_key=creds['SecretAccessKey'],
#     aws_session_token=creds['SessionToken'],
#     region_name=AWS_REGION
# )
# batch = batch_boto_session.client('batch')

# batch.terminate_job(jobId="aaf02bed-6135-4bb3-bbe7-54f58237a7a0", reason="running indefinitely")

## Training

In [6]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
    entry_point="train.py",
    source_dir="src",
    py_version="py38",
    framework_version="1.12",
    role="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role",
    instance_count=1,
    instance_type="ml.p3.2xlarge",
    volume_size=100,
    hyperparameters={
        "epochs": 15
    },
    base_job_name="fpe-rank",
    output_path=output_path,
    checkpoint_s3_uri=checkpoint_path,
    code_location=output_path,
    disable_output_compression=False,
    sagemaker_session=sm_session
)

In [7]:
from sagemaker.inputs import TrainingInput

input_images = TrainingInput(
    s3_data = f"{input_path}/manifest.json",
    s3_data_type = "ManifestFile",
    input_mode = "File"
)
(input_images, f"{input_path}/manifest.json", input_path)

(<sagemaker.inputs.TrainingInput at 0x1fb540a06d0>,
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-25/input/manifest.json',
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-25/input')

In [8]:
estimator.fit({ "images": input_images, "values": input_path }, wait=False)

Using provided s3_resource


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fpe-rank-2023-10-05-18-55-03-245


In [79]:
sm_session.stop_training_job("fpe-rank-2023-09-23-01-46-19-458")

## Batch Transform

In [99]:
with open(f"{site_dir}/data/station.json", "r") as f:
    station = json.load(f)
station

{'id': 29,
 'user_id': '0626d282-0267-40b0-8f17-214c8f72e551',
 'name': 'West Brook 0_01171100',
 'description': '50 feet downstream river left of pantry rd bridge.',
 'latitude': 42.4143,
 'longitude': -72.6293,
 'timezone': 'US/Eastern',
 'created_at': '2022-02-10 15:05:42',
 'updated_at': '2023-01-20 14:41:21',
 'private': False,
 'nwis_id': '01171100'}

In [100]:
def download_output(session, site, model_id, root_dir = "D:/fpe/sites"):
    s3 = session.client("s3")
    output_dir = f"{root_dir}/{site}/models/{model_id}/output"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_file = f"{output_dir}/output.tar.gz"
    with open(f"{root_dir}/{site}/models/{model_id}/job.txt", "r") as f:
        job_id = f.readline().strip()
    print(f"job: {job_id}")
    output_key = f"rank/{site}/{model_id}/jobs/{job_id}/output/output.tar.gz"
    print(f"downloading: s3://{model_bucket}/{output_key} -> {output_file}")
    s3.download_file(Bucket=model_bucket, Key=output_key, Filename=output_file)
    print(f"extracting: {output_file} -> {output_dir}")
    !tar -xzvf {output_file} -C {output_dir}
    output_file

In [101]:
download_output(session, site, model_id)

job: fpe-rank-2023-10-05-18-51-18-351
downloading: s3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-75/jobs/fpe-rank-2023-10-05-18-51-18-351/output/output.tar.gz -> D:/fpe/sites/WESTB0/models/20230922-75/output/output.tar.gz
extracting: D:/fpe/sites/WESTB0/models/20230922-75/output/output.tar.gz -> D:/fpe/sites/WESTB0/models/20230922-75/output


x args.json
x metrics.csv


In [102]:
transform_images_file = f"{model_dir}/input/images.csv"
transform_images = pd.read_csv(transform_images_file)
transform_images["timestamp"] = pd.to_datetime(transform_images["timestamp"]).dt.tz_convert(station["timezone"])
print(f"rows: {len(transform_images)}")

rows: 43060


In [103]:
transform_images["hour"] = transform_images["timestamp"].dt.hour
transform_images.groupby("hour").size().reset_index(name="count")

Unnamed: 0,hour,count
0,0,1343
1,1,1830
2,2,1817
3,3,1844
4,4,1859
5,5,1816
6,6,1845
7,7,1867
8,8,1814
9,9,1853


In [104]:
transform_images["month"] = transform_images["timestamp"].dt.month
transform_images.groupby("month").size().reset_index(name="count")

Unnamed: 0,month,count
0,1,2864
1,2,1338
2,3,5382
3,4,5509
4,5,2667
5,6,4975
6,7,5891
7,8,5890
8,9,3351
9,10,240


In [105]:
transform_images = transform_images[transform_images["timestamp"].dt.hour.between(7, 18)]
transform_images["timestamp"] = transform_images["timestamp"].dt.tz_convert("UTC")
transform_images.to_csv(f"{model_dir}/input/images.csv", index=False)

In [106]:
s3_key = f"{transform_key}/images.csv"
print(f"uploading: {transform_images_file} -> {s3_key}")
s3.upload_file(Filename=transform_images_file, Bucket=model_bucket, Key=s3_key)

uploading: D:/fpe/sites/WESTB0/models/20230922-75/input/images.csv -> rank/WESTB0/20230922-75/transform/images.csv


In [107]:
transform_images

Unnamed: 0,station_name,station_id,imageset_id,image_id,timestamp,filename,url,flow_cfs,hour,month
0,West Brook 0_01171100,29,289,441411,2022-02-01 18:00:00+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
1,West Brook 0_01171100,29,289,441412,2022-02-01 18:15:00+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
2,West Brook 0_01171100,29,289,441413,2022-02-01 18:30:00+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
3,West Brook 0_01171100,29,289,441414,2022-02-01 18:45:00+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
4,West Brook 0_01171100,29,289,441415,2022-02-01 18:59:59+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
...,...,...,...,...,...,...,...,...,...,...
43055,West Brook 0_01171100,29,2099,3019951,2023-09-06 14:45:00+00:00,imagesets/f9470ac6-e435-4786-85ea-523d5b2dcb14...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.45,10,9
43056,West Brook 0_01171100,29,2099,3019952,2023-09-06 15:00:00+00:00,imagesets/f9470ac6-e435-4786-85ea-523d5b2dcb14...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.27,11,9
43057,West Brook 0_01171100,29,2099,3019953,2023-09-06 15:15:00+00:00,imagesets/f9470ac6-e435-4786-85ea-523d5b2dcb14...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.45,11,9
43058,West Brook 0_01171100,29,2099,3019954,2023-09-06 15:30:00+00:00,imagesets/f9470ac6-e435-4786-85ea-523d5b2dcb14...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.45,11,9


In [108]:
manifest = transform_images['filename'].to_list()
manifest.insert(0, {"prefix": f"s3://{storage_bucket}/"})

manifest_key = f"{transform_key}/manifest.json"
body = json.dumps(manifest)
print(f"uploading transform manifest: {manifest_key} (n = {len(manifest) - 1})")
s3.put_object(Bucket=model_bucket, Key=manifest_key, Body=body)

uploading transform manifest: rank/WESTB0/20230922-75/transform/manifest.json (n = 21781)


{'ResponseMetadata': {'RequestId': 'R0VE78RW479TPSHB',
  'HostId': 'jnbtR3fIBjLs2qeiC97eRORyMonAO1udvulukfTT2JunzHN39rzQDpAmdLnWRtXgT3taOQy5lbI=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'jnbtR3fIBjLs2qeiC97eRORyMonAO1udvulukfTT2JunzHN39rzQDpAmdLnWRtXgT3taOQy5lbI=',
   'x-amz-request-id': 'R0VE78RW479TPSHB',
   'date': 'Fri, 06 Oct 2023 01:48:03 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"9a0948352f1b84e944f1e6c117343d0e"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"9a0948352f1b84e944f1e6c117343d0e"',
 'ServerSideEncryption': 'AES256'}

In [109]:
with open(f"{model_dir}/job.txt", "r") as f:
    job_name = f.readline().strip()
model_path = f"{output_path}/{job_name}/output/model.tar.gz"
model_path

's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-75/jobs/fpe-rank-2023-10-05-18-51-18-351/output/model.tar.gz'

In [110]:
from sagemaker.pytorch.model import PyTorchModel

pytorch_model = PyTorchModel(
    model_data=model_path,
    role="arn:aws:iam::694155575325:role/fpe-prod-sagemaker-execution-role",
    py_version="py38",
    framework_version="1.12",
    source_dir="src/",
    entry_point="transform.py",
    sagemaker_session = sm_session
)
pytorch_model

<sagemaker.pytorch.model.PyTorchModel at 0x1fb5e80b9a0>

In [111]:
transformer = pytorch_model.transformer(
    instance_count=1,
    instance_type="ml.c5.xlarge",
    output_path=transform_path
)
(transformer,transform_path)

INFO:sagemaker:Repacking model artifact (s3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-75/jobs/fpe-rank-2023-10-05-18-51-18-351/output/model.tar.gz), script artifact (src/), and dependencies ([]) into single tar.gz file located at s3://sagemaker-us-west-2-694155575325/pytorch-inference-2023-10-06-01-48-09-249/model.tar.gz. This may take some time depending on model size...


INFO:sagemaker:Creating model with name: pytorch-inference-2023-10-06-01-48-49-890


(<sagemaker.transformer.Transformer at 0x1fb59366880>,
 's3://usgs-chs-conte-prod-fpe-models/rank/WESTB0/20230922-75/transform')

In [112]:
transformer.transform(
    data=f"{transform_path}/manifest.json",
    data_type="ManifestFile",
    content_type="image/jpg",
    wait=False,
)

INFO:sagemaker:Creating transform job with name: pytorch-inference-2023-10-06-01-48-52-719


In [132]:
sm_session.stop_transform_job("pytorch-inference-2023-09-29-00-44-57-671")

INFO:sagemaker:Stopping transform job: pytorch-inference-2023-09-29-00-44-57-671


## Process Transform Output

In [120]:
def process_transform_output (session, site, model_id, job_size = 5000, root_dir = "D:/fpe/sites"):
    lambda_client = session.client("lambda")
    transform_prefix = f"rank/{site}/{model_id}/transform"
    df = pd.read_csv(f"{root_dir}/{site}/models/{model_id}/input/images.csv")
    skip = 0
    while skip < len(df):
        payload = {
            "action": "process_transform_output",
            "bucket_name": model_bucket,
            "data_file": f"{transform_prefix}/images.csv",
            "data_prefix": transform_prefix,
            "output_prefix": transform_prefix,
            "n": job_size,
            "skip": skip
        }
        print(f"invoke: skip={skip}, n={job_size} ({skip} to {skip + job_size - 1})")
        lambda_client.invoke(
            FunctionName="fpe-prod-lambda-models",
            InvocationType="Event",
            Payload=json.dumps(payload)
        )
        skip += job_size
    return df

In [123]:
process_transform_output(session, "WESTB0", "20230922-75")

invoke: skip=0, n=5000 (0 to 4999)
invoke: skip=5000, n=5000 (5000 to 9999)
invoke: skip=10000, n=5000 (10000 to 14999)
invoke: skip=15000, n=5000 (15000 to 19999)
invoke: skip=20000, n=5000 (20000 to 24999)


Unnamed: 0,station_name,station_id,imageset_id,image_id,timestamp,filename,url,flow_cfs,hour,month
0,West Brook 0_01171100,29,289,441411,2022-02-01 18:00:00+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
1,West Brook 0_01171100,29,289,441412,2022-02-01 18:15:00+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
2,West Brook 0_01171100,29,289,441413,2022-02-01 18:30:00+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
3,West Brook 0_01171100,29,289,441414,2022-02-01 18:45:00+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
4,West Brook 0_01171100,29,289,441415,2022-02-01 18:59:59+00:00,imagesets/e8d465f6-5784-4231-967f-9000428e9748...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,4.71,13,2
...,...,...,...,...,...,...,...,...,...,...
21776,West Brook 0_01171100,29,2099,3019951,2023-09-06 14:45:00+00:00,imagesets/f9470ac6-e435-4786-85ea-523d5b2dcb14...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.45,10,9
21777,West Brook 0_01171100,29,2099,3019952,2023-09-06 15:00:00+00:00,imagesets/f9470ac6-e435-4786-85ea-523d5b2dcb14...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.27,11,9
21778,West Brook 0_01171100,29,2099,3019953,2023-09-06 15:15:00+00:00,imagesets/f9470ac6-e435-4786-85ea-523d5b2dcb14...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.45,11,9
21779,West Brook 0_01171100,29,2099,3019954,2023-09-06 15:30:00+00:00,imagesets/f9470ac6-e435-4786-85ea-523d5b2dcb14...,https://usgs-chs-conte-prod-fpe-storage.s3.ama...,3.45,11,9


In [113]:
def combine_predictions (session, site, model_id, job_size = 5000, root_dir = "D:/fpe/sites"):
    s3 = session.client("s3")
    transform_prefix = f"rank/{site}/{model_id}/transform"
    df = pd.read_csv(f"{root_dir}/{site}/models/{model_id}/input/images.csv")
    keys = [f"{transform_prefix}/predictions-{skip:05d}-{(skip + job_size - 1):05d}.csv" for skip in range(0, len(df), job_size)]

    dfs = []
    for key in keys:
        print(key)
        csv_obj = s3.get_object(Bucket=model_bucket, Key=key)
        csv_data = csv_obj['Body'].read().decode('utf-8')
        dfs.append(pd.read_csv(StringIO(csv_data)))

    df = pd.concat(dfs, ignore_index=True)

    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=False)
    output_key = f"{transform_key}/predictions.csv"
    s3.put_object(Body=csv_buffer.getvalue(), Bucket=model_bucket, Key=output_key)
    transform_path = f"{root_dir}/{site}/models/{model_id}/transform"
    if not os.path.exists(transform_path):
        os.makedirs(transform_path)
    df.to_csv(f"{transform_path}/predictions.csv", index=False)
    df


In [125]:
combine_predictions(session, "WESTB0", "20230922-75")

rank/WESTB0/20230922-75/transform/predictions-00000-04999.csv
rank/WESTB0/20230922-75/transform/predictions-05000-09999.csv
rank/WESTB0/20230922-75/transform/predictions-10000-14999.csv
rank/WESTB0/20230922-75/transform/predictions-15000-19999.csv
rank/WESTB0/20230922-75/transform/predictions-20000-24999.csv
