### Set up

#### 1. Set  up  accounts and role

In [None]:
import sagemaker
import boto3
import os
import sys
sys.path.append('./src')


sagemaker_session = sagemaker.Session()
account_id =  boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name


#role = sagemaker.get_execution_role()
role="arn:aws:iam::{}:role/service-role/AmazonSageMaker-ExecutionRole-20190118T115449".format(account_id)


#### 2. Setup image and instance type

In [None]:
pytorch_custom_image_name="image-embedding:gpu-1.0.0-201908270722"
instance_type = "ml.p3.2xlarge" 

In [None]:
docker_repo = "{}.dkr.ecr.{}.amazonaws.com/{}".format(account_id, region, pytorch_custom_image_name)

#### 3. Configure train/ test and validation datasets

In [None]:
bucket = sagemaker_session.default_bucket()
raw_bucket="aegovansagemaker"

In [None]:
s3_train_raw = "s3://{}/merket1501/bounding_box_train/".format(raw_bucket)

In [None]:
s3_train="s3://{}/market1501/train/".format(bucket)
s3_val="s3://{}/market1501/val/".format(bucket)
s3_output_path= "s3://{}/market1501_output/".format(bucket)

## Split train test

In [None]:
temp_dir="/tmp/imageebedding"
train_raw_dir = os.path.join(temp_dir, "train_raw")
val_raw_dir = os.path.join(temp_dir, "val_raw")



In [None]:
!rm -rf $temp_dir 
!mkdir  -p $temp_dir 
!mkdir -p  $train_raw_dir
!mkdir -p  $val_raw
!aws s3 sync $s3_train_raw $train_raw_dir --quiet

In [None]:
from datasets.Market1501Dataset import Market1501Dataset

dataset = Market1501Dataset(train_raw_dir)

In [None]:
files = [os.path.join(train_raw_dir, f) for f in os.listdir(train_raw_dir)]

# The market 1501 dataset files have the naming convention target_camerasite_..., e.g. 1038_c2s2_131202_03.jpeg
target_raw_labels = [os.path.basename(f).split("_")[0] for f in files]
zero_indexed_labels_dict = {}
for rc in target_raw_labels:
    zero_indexed_labels_dict[rc] = zero_indexed_labels_dict.get(rc, len(zero_indexed_labels_dict))

target_zero_indexed_labels = [zero_indexed_labels_dict[l] for l in target_raw_labels]

In [None]:
from sklearn.model_selection import train_test_split

datatrain_x, dataval_x,  datatrain_y, dataval_y= train_test_split( files,target_zero_indexed_labels,  test_size=0.20, random_state=42)

In [None]:
import boto3
import os
s3_client = boto3.client('s3')

def upload_files(files, s3_dest):
    for f in files:
        fname=os.path.basename(f)
        prefix = "/".join( s3_dest.split("//")[1].split("/")[1:])
        key = "{}/{}".format(prefix.strip("/"), fname)
        bucket_d = s3_dest.split("//")[1].split("/")[0]
        s3_client.upload_file(f,   bucket_d, key)
    

In [None]:
%%time

upload_files(dataval_x, s3_val)

In [None]:
%%time

upload_files(datatrain_x, s3_train)

### Start training

In [None]:
inputs = {
    "train" : train,
    "val" :val
}

In [None]:
hyperparameters = {
    "batchsize": "32",
    "epochs" : "1000",
    "log-level" : "INFO"
}

In [None]:
metric_definitions = [{"Name": "TrainLoss",
                     "Regex": "###score: train_loss### (\d*[.]?\d*)"}
                    ,{"Name": "ValidationLoss",
                     "Regex": "###score: val_loss### (\d*[.]?\d*)"}
                    ,{"Name": "TrainAccuracy",
                     "Regex": "###score: train_accuracy### (\d*[.]?\d*)"}
                   ,{"Name": "ValidationAccuracy",
                     "Regex": "###score: val_accuracy### (\d*[.]?\d*)"}
                    ]

In [None]:
git_config = {'repo': 'https://github.com/elangovana/image-embedding.git',
              'branch': 'master'}
             # 'commit': 'a8be7cb98ec93150b5156447a307afb02f3f5fb5'}

In [None]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
     entry_point='experiment_train.py',
                    source_dir = 'src',
                    dependencies =['src'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                    git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    #train_use_spot_instances = True
                    base_job_name ="image-embedding")

In [None]:
estimator.fit(inputs)