# FPE Rank Model

In [9]:
import boto3
import os
import pandas as pd
from io import StringIO, BytesIO
from urllib.parse import urlparse
import json

def get_url_path(url):
    return urlparse(url).path[1:]

def create_manifest(bucket, data_key, filename = "images.manifest"):
    s3 = boto3.client('s3')

    # Read CSV file from S3
    print(f"downloading data: {data_key}")
    csv_obj = s3.get_object(Bucket=bucket, Key=data_key)
    csv_data = csv_obj['Body'].read().decode('utf-8')
    data = pd.read_csv(StringIO(csv_data))
    print(f"length: {len(data)}")

    # Extract 'URL' column and get keys
    manifest = data['url'].apply(get_url_path).to_list()
    manifest.insert(0, {"prefix": f"s3://{bucket}/"})

    # Upload JSON to S3
    prefix = os.path.dirname(data_key)
    manifest_key = f"{prefix}/{filename}"
    body = json.dumps(manifest)
    print(f"uploading manifest: {manifest_key}")
    s3.put_object(Bucket=bucket, Key=manifest_key, Body=body)
    return f"s3://{bucket}/{manifest_key}"

In [10]:
create_manifest("walkerenvres-fpe-models", "models/AVERYBB/data/flow-images-subset.csv", "images-subset.manifest")

downloading data: models/AVERYBB/data/flow-images-subset.csv
length: 4298
uploading manifest: models/AVERYBB/data/images-subset.manifest


's3://walkerenvres-fpe-models/models/AVERYBB/data/images-subset.manifest'

In [11]:
create_manifest("walkerenvres-fpe-models", "models/WESTB0/data/flow-images-subset.csv", "images-subset.manifest")

downloading data: models/WESTB0/data/flow-images-subset.csv
length: 2484
uploading manifest: models/WESTB0/data/images-subset.manifest


's3://walkerenvres-fpe-models/models/WESTB0/data/images-subset.manifest'

In [44]:
from sagemaker.pytorch import PyTorch

output_path = "s3://walkerenvres-fpe-models/models/AVERYBB/jobs/"
checkpoint_path = "s3://walkerenvres-fpe-models/models/AVERYBB/checkpoints/"
estimator = PyTorch(
    entry_point="train.py",
    source_dir="src",
    py_version="py38",
    framework_version="1.12",
    role="arn:aws:iam::474916309046:role/service-role/AmazonSageMaker-ExecutionRole-20201211T145559",
    instance_count=1,
    # instance_type="ml.m5.large",
    instance_type="ml.p3.2xlarge",
    volume_size=100,
    hyperparameters={
        "site": "AVERYBB",
        "data-file": "flow-images-subset.csv", 
        "num-image-stats": 1000,
        "epochs": 15
    },
    base_job_name="fpe-rank-AVERYBB",
    output_path=output_path,
    checkpoint_s3_uri=checkpoint_path,
    code_location=output_path[:-1],
    disable_output_compression=True
)

In [45]:
from sagemaker.inputs import TrainingInput

data_dir = f"s3://walkerenvres-fpe-models/models/AVERYBB/data"
images = TrainingInput(s3_data = f"{data_dir}/images-subset.manifest", s3_data_type = "ManifestFile", input_mode = "File")
values = f"{data_dir}/flow-images-subset.csv"
(images, values)

(<sagemaker.inputs.TrainingInput at 0x7f78903ecd00>,
 's3://walkerenvres-fpe-models/models/AVERYBB/data/flow-images-subset.csv')

In [None]:
estimator.fit({ "images": images, "values": values })

Using provided s3_resource


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: fpe-rank-AVERYBB-2023-08-02-17-12-22-727


2023-08-02 17:12:23 Starting - Starting the training job...
2023-08-02 17:12:53 Starting - Preparing the instances for training.........
2023-08-02 17:14:16 Downloading - Downloading input data.........
2023-08-02 17:15:31 Training - Downloading the training image............
2023-08-02 17:18:02 Training - Training image download completed. Training in progress....[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-08-02 17:18:26,468 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-08-02 17:18:26,487 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2023-08-02 17:18:26,500 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2023-08-02 17:18:26,503 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2023-08-02 17:18:26,