# **Evaluation**
* Container: codna_pytorch_p310

## AutoReload

In [2]:
%load_ext autoreload
%autoreload 2

## 1. parameter store 설정

In [3]:
import boto3
from utils.ssm import parameter_store

In [4]:
region = boto3.Session().region_name
pm = parameter_store(region)
prefix = pm.get_params(key="PREFIX")

## 2. Evaluation-job

In [5]:
import os
import sagemaker
from sagemaker.pytorch.estimator import PyTorch
from sagemaker.workflow.execution_variables import ExecutionVariables
from sagemaker.processing import ProcessingInput, ProcessingOutput, FrameworkProcessor



* Params for evaluation job

In [9]:
local_mode = False

if local_mode: 
    instance_type = 'local'
    
    import os
    from sagemaker.local import LocalSession
    
    sagemaker_session = LocalSession()
    model_path = pm.get_params(key="-".join([prefix, "MODEL-PATH"]))
    data_path = os.path.join(os.getcwd(), "data", "preprocessing", "test")
    
else:
    instance_type = "ml.m5.2xlarge"
    sagemaker_session = sagemaker.Session()
    model_path = pm.get_params(key="-".join([prefix, "MODEL-PATH"]))
    data_path = os.path.join(pm.get_params(key="-".join([prefix, "PREP-DATA-PATH"])), "test")
    
git_config = {
    'repo': f'https://{pm.get_params(key="-".join([prefix, "CODE_REPO"]))}',
    'branch': 'main',
    'username': pm.get_params(key="-".join([prefix, "CODECOMMIT-USERNAME"]), enc=True),
    'password': pm.get_params(key="-".join([prefix, "CODECOMMIT-PWD"]), enc=True)
}

role = pm.get_params(key="-".join([prefix, "SAGEMAKER-ROLE-ARN"]))
bucket_name = pm.get_params(key="-".join([prefix, "BUCKET-NAME"]))
    
print (f'instance-type: {instance_type}')
print (f'role: {role}')
print (f'bucket: {bucket_name}')
print (f'model-path: {model_path}')
print (f'dataset-path: {data_path}')
print (f'sagemaker_session: {sagemaker_session}')
print (f'git_config: {git_config}')

instance-type: ml.m5.2xlarge
role: AmazonSageMaker-ExecutionRole-20221004T162466
bucket: sm-bert-ramp
model-path: s3://sm-bert-ramp/ramp-mlops/training/model-output/finetune-distilbert-base-uncased-0807-05461691387181/output/model.tar.gz
dataset-path: s3://sm-bert-ramp/ramp-mlops/preprocessing/data/test
sagemaker_session: <sagemaker.session.Session object at 0x7fc71eb34fd0>
git_config: {'repo': 'https://git-codecommit.ap-northeast-2.amazonaws.com/v1/repos/bert-code', 'branch': 'main', 'username': 'dongjin-at-419974056037', 'password': 'n1h2OES6ZiHws5kGNt0TJxtoLaAGxjLkOxtmlzc5YWg='}


* Define processing job

In [10]:
dataset_processor = FrameworkProcessor(
    estimator_cls=PyTorch,
    framework_version="2.0.0",
    py_version='py310',
    instance_type=instance_type,
    instance_count=1,
    role=role,
    base_job_name="evaluation", # bucket에 보이는 이름 (pipeline으로 묶으면 pipeline에서 정의한 이름으로 bucket에 보임)
    sagemaker_session=sagemaker_session
)

proc_prefix = "/opt/ml/processing"

output_path = os.path.join(
    "s3://{}".format(bucket_name),
    prefix,
    "evaluation",
    "results"
)

INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.


In [11]:
dataset_processor.run(
    #job_name="preprocessing", ## 이걸 넣어야 캐시가 작동함, 안그러면 프로세서의 base_job_name 이름뒤에 날짜 시간이 붙어서 캐시 동작 안함
    code='evaluation.py', #소스 디렉토리 안에서 파일 path
    source_dir= "./code", #현재 파일에서 소스 디렉토리 상대경로 # add processing.py and requirements.txt here
    git_config=git_config,
    inputs=[
        ProcessingInput(
            input_name="input-data",
            source=data_path,
            destination=os.path.join(proc_prefix, "input")
        ),
        ProcessingInput(
            input_name="model-data",
            source=model_path,
            destination=os.path.join(proc_prefix, "model")
        ),
    ],
    outputs=[       
        ProcessingOutput(
            output_name="output-data",
            source=os.path.join(proc_prefix, "output"),
            destination=output_path
        ),
    ],
    arguments=[
        "--proc_prefix", proc_prefix,
    ]
)

Cloning into '/tmp/tmpwj14xkva'...
remote: Counting objects: 9, done.        
Already on 'main'
INFO:sagemaker.processing:Uploaded /tmp/tmpwj14xkva/./code to s3://sagemaker-ap-northeast-2-419974056037/evaluation-2023-08-07-05-57-06-074/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sagemaker-ap-northeast-2-419974056037/evaluation-2023-08-07-05-57-06-074/source/runproc.sh


Your branch is up to date with 'origin/main'.
Using provided s3_resource


INFO:sagemaker:Creating processing-job with name evaluation-2023-08-07-05-57-06-074


[34mCollecting accelerate>=0.20.3 (from -r requirements.txt (line 1))
  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 244.2/244.2 kB 10.8 MB/s eta 0:00:00[0m
[34mCollecting transformers==4.31.0 (from -r requirements.txt (line 2))
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 7.4/7.4 MB 19.1 MB/s eta 0:00:00[0m
[34mCollecting datasets[s3]==1.18.4 (from -r requirements.txt (line 3))
  Downloading datasets-1.18.4-py3-none-any.whl (312 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 312.1/312.1 kB 39.2 MB/s eta 0:00:00[0m
[34mCollecting huggingface-hub<1.0,>=0.14.1 (from transformers==4.31.0->-r requirements.txt (line 2))
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 268.8/268.8 kB 34.2 MB/s eta 0:00:00[0m
[34mCollecting regex!=2019.12.17 (from transformers==4.31.0->-r requirements.txt (line 2))
  Downloading

## 3. parameter store에 Evaluation output 추가

In [12]:
pm.put_params(key="-".join([prefix, "EVAL-OUTPUT-PATH"]), value=output_path+"/evaluation.json", overwrite=True)

'Store suceess'

In [13]:
pm.get_params(key="-".join([prefix, "EVAL-OUTPUT-PATH"]))

's3://sm-bert-ramp/ramp-mlops/evaluation/results/evaluation.json'