# <B> Evaluation </B>
* Container: codna_pytorch_py39

## AutoReload

In [1]:
%load_ext autoreload
%autoreload 2

## 1. parameter store 셋팅

In [2]:
import boto3
from utils.ssm import parameter_store

In [3]:
strRegionName=boto3.Session().region_name
pm = parameter_store(strRegionName)
prefix = pm.get_params(key="PREFIX")

## 2.Processing-job for evaluation

In [4]:
import os
import sagemaker
from sagemaker.pytorch.estimator import PyTorch
from sagemaker.workflow.execution_variables import ExecutionVariables
from sagemaker.processing import ProcessingInput, ProcessingOutput, FrameworkProcessor

* **Set Up SageMaker Experiment**
    - Create or load [SageMaker Experiment](https://docs.aws.amazon.com/sagemaker/latest/dg/experiments.html) for the example training job. This will create an experiment trial object in SageMaker.

* params for processing job

In [24]:
from time import strftime
from smexperiments.trial import Trial
from smexperiments.experiment import Experiment

In [25]:
def create_experiment(experiment_name):
    try:
        sm_experiment = Experiment.load(experiment_name)
    except:
        sm_experiment = Experiment.create(experiment_name=experiment_name)

In [26]:
def create_trial(experiment_name):
    create_date = strftime("%m%d-%H%M%s")
    sm_trial = Trial.create(trial_name=f'{experiment_name}-{create_date}',
                            experiment_name=experiment_name)

    job_name = f'{sm_trial.trial_name}'
    return job_name

In [71]:
local_mode = False

if local_mode: 
    instance_type = 'local'
    
    import os
    from sagemaker.local import LocalSession
    from nemo.utils.notebook_utils import download_an4
    
    sagemaker_session = LocalSession()
    sagemaker_session.config = {'local': {'local_code': True}}

    data_path = os.getcwd() + "/data-tmp"
    download_an4(
        data_dir=data_path,
        train_mount_dir="/opt/ml/input/data/training/",
        test_mount_dir="/opt/ml/input/data/testing/",
    )
    
else:
    instance_type = "ml.g4dn.xlarge"
    sagemaker_session = sagemaker.Session()
    data_path = pm.get_params(key=prefix + 'S3-DATA-PATH')
    
model_artifact_s3_uri = pm.get_params(key=prefix + 'MODEL-PATH').rsplit("/", 1)[0]
test_manifest_s3_path = os.path.join(pm.get_params(key=prefix + 'PREP-DATA-PATH'), "an4", "test_manifest.json")
wav_s3_uri = os.path.join(pm.get_params(key=prefix + 'PREP-DATA-PATH'), "an4", "wav")
eval_output_s3_uri = os.path.join("s3://", pm.get_params(key=prefix + 'BUCKET'), "evaluation", "output", "eval_output")

code_location= os.path.join(
    "s3://{}".format(pm.get_params(key=prefix + "BUCKET")),
    "evaluation",
    "backup_codes"
)
    
print (f"instance-type: {instance_type}")
print (f"image-uri: {pm.get_params(key=''.join([prefix, 'IMAGE-URI']))}")
print (f"role: {pm.get_params(key=prefix + 'SAGEMAKER-ROLE-ARN')}")
print (f"bucket: {pm.get_params(key=prefix + 'BUCKET')}")
print (f"model-path: {pm.get_params(key=prefix + 'MODEL-PATH')}")
print (f"preprocessing-data-path: {pm.get_params(key=prefix + 'PREP-DATA-PATH')}")

print (f"model_artifact_s3_uri: {model_artifact_s3_uri}")
print (f"test_manifest_s3_path: {test_manifest_s3_path}")
print (f"wav_s3_uri: {wav_s3_uri}")
print (f"eval_output_s3_uri: {eval_output_s3_uri}")

print (f"data_dir: {data_path}")

print (f"sagemaker_session: {sagemaker_session}")

instance-type: ml.g4dn.xlarge
image-uri: 419974056037.dkr.ecr.ap-northeast-2.amazonaws.com/nemo-image:latest
role: arn:aws:iam::419974056037:role/service-role/AmazonSageMaker-ExecutionRole-20221206T163436
bucket: sm-nemo-bucket
model-path: s3://sm-nemo-bucket/training/model-output/SM-NeMo-nemo-exp1-0316-14541678978443/model.tar.gz
preprocessing-data-path: s3://sm-nemo-bucket/preprocessing/data
model_artifact_s3_uri: s3://sm-nemo-bucket/training/model-output/SM-NeMo-nemo-exp1-0316-14541678978443
test_manifest_s3_path: s3://sm-nemo-bucket/preprocessing/data/an4/test_manifest.json
wav_s3_uri: s3://sm-nemo-bucket/preprocessing/data/an4/wav
eval_output_s3_uri: s3://sm-nemo-bucket/evaluation/output/eval_output
data_dir: s3://sm-nemo-bucket/data
sagemaker_session: <sagemaker.session.Session object at 0x7f35689f0250>


* Define processing job

In [72]:
eval_processor = FrameworkProcessor(
    estimator_cls=PyTorch,
    framework_version="1.13.1",
    role=pm.get_params(key=prefix + "SAGEMAKER-ROLE-ARN"), 
    image_uri=pm.get_params(key=''.join([prefix, "IMAGE-URI"])),
    instance_count=1,
    instance_type=instance_type,
    code_location=code_location,
    env={
        'TEST_MANIFEST_PATH': '/opt/ml/input/data/testing/an4/wav', 
        'WAV_PATH' : '/opt/ml/processing/input/wav'
        }
    )

In [74]:
experiment_name = pm.get_params(key=prefix + "EXPERI-NAME")
create_experiment(experiment_name)
job_name = create_trial(experiment_name)

eval_processor.run(
    code="evaluate.py",
    source_dir="./an4_nemo_sagemaker/code/evaluation/",
    inputs=[
        ProcessingInput(
            source=model_artifact_s3_uri,
            input_name="model_artifact",
            destination="/opt/ml/processing/model"
        ),
        ProcessingInput(
            source=test_manifest_s3_path,
            input_name="test_manifest_file",
            destination="/opt/ml/processing/input/manifest"
        ),
        ProcessingInput(
            source=wav_s3_uri,
            input_name="wav_dataset",
            destination="/opt/ml/processing/input/wav"
        ),
    ],
    outputs=[
        ProcessingOutput(
            output_name="eval-output-data",
            source="/opt/ml/processing/evaluation",
            destination=eval_output_s3_uri
            
        ),
    ],
    job_name=job_name,
    experiment_config={
      'TrialName': job_name,
      'TrialComponentDisplayName': job_name,
    },
    wait=True
)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:sagemaker.processing:Uploaded ./an4_nemo_sagemaker/code/evaluation/ to s3://sm-nemo-bucket/evaluation/backup_codes/SM-NeMo-nemo-exp1-0316-15351678980930/source/sourcedir.tar.gz
INFO:sagemaker.processing:runproc.sh uploaded to s3://sm-nemo-bucket/evaluation/backup_codes/SM-NeMo-nemo-exp1-0316-15351678980930/source/runproc.sh
INFO:sagemaker:Creating processing-job with name SM-NeMo-nemo-exp1-0316-15351678980930


.....................................[34m[NeMo W 2023-03-16 15:41:38 optimizers:66] Could not import distributed_fused_adam optimizer from Apex[0m
[34m[NeMo W 2023-03-16 15:41:40 experimental:27] Module <class 'nemo.collections.asr.modules.audio_modules.SpectrogramToMultichannelFeatures'> is experimental, not ready for production and is not fully supported. Use at your own risk.[0m
[34mcheckpoint_path : trained_model/Conformer-CTC-Char1/2023-03-16_14-54-47/checkpoints/Conformer-CTC-Char1--val_wer=1.0000-epoch=2-last.ckpt[0m
[34m[NeMo W 2023-03-16 15:41:45 modelPT:161] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    manifest_filepath: /opt/ml/input/data/training/an4/train_manifest.json
    labels:
    - ' '
    - a
    - b
    - c
    - d
    - e
    - f
    - g
    - h
    - i
    - j
    - k
    - l
    - m
    - 'n'
    - o
    - p
  

In [None]:
eval_processor

In [75]:
pm.put_params(key="".join([prefix, "EVAL-OUTPUT-PATH"]), value=eval_output_s3_uri+"evaluation.json", overwrite=True)

'Store suceess'

In [77]:
pm.get_params(key="".join([prefix, "EVAL-OUTPUT-PATH"]))

's3://sm-nemo-bucket/evaluation/output/eval_outputevaluation.json'