# <B> Setup for Nvidai NeMo with SageMaker </B>
* Container: codna_python3

## AutoReload

In [1]:
%load_ext autoreload
%autoreload 2

## 0. Install packages

In [2]:
install_needed = True  # should only be True once
# install_needed = False

In [3]:
%%bash
#!/bin/bash

DAEMON_PATH="/etc/docker"
MEMORY_SIZE=10G

FLAG=$(cat $DAEMON_PATH/daemon.json | jq 'has("data-root")')
# echo $FLAG

if [ "$FLAG" == true ]; then
    echo "Already revised"
else
    echo "Add data-root and default-shm-size=$MEMORY_SIZE"
    sudo cp $DAEMON_PATH/daemon.json $DAEMON_PATH/daemon.json.bak
    sudo cat $DAEMON_PATH/daemon.json.bak | jq '. += {"data-root":"/home/ec2-user/SageMaker/.container/docker","default-shm-size":"'$MEMORY_SIZE'"}' | sudo tee $DAEMON_PATH/daemon.json > /dev/null
    sudo service docker restart
    echo "Docker Restart"
fi

Already revised


In [None]:
import sys
import IPython

if install_needed:
    print("installing deps and restarting kernel")
    !{sys.executable} -m pip install -U pip
    !{sys.executable} -m pip install -U smdebug sagemaker-experiments
    !{sys.executable} -m pip install -U sagemaker
    !{sys.executable} -m pip install -U datasets transformers
    !{sys.executable} -m pip install -U wget omegaconf text-unidecode sox
    
    ## Install NeMo
    !sudo yum install sox -y
    !sudo yum install libsndfile
    !pip install --upgrade --force-reinstall llvmlite
    BRANCH = 'main'
    !{sys.executable} -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]
    
    IPython.Application.instance().kernel.do_shutdown(True)

installing deps and restarting kernel
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting pip
  Downloading pip-23.0.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m82.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 22.3.1
    Uninstalling pip-22.3.1:
      Successfully uninstalled pip-22.3.1
Successfully installed pip-23.0.1
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting smdebug
  Downloading smdebug-1.0.12-py2.py3-none-any.whl (270 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.1/270.1 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sagemaker-experiments
  Downloading sagemaker_experiments-0.1.43-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m

## 1. Set roles

In [4]:
from sagemaker import get_execution_role

In [5]:
strSageMakerRoleName = get_execution_role().rsplit('/', 1)[-1]
print (f"SageMaker Execution Role Name: {strSageMakerRoleName}")

SageMaker Execution Role Name: AmazonSageMaker-ExecutionRole-20221206T163436


## 1.1 Attach IAM polich to sagemaker execution role (<b>with console</b>)
> **SSM**: "arn:aws:iam::aws:policy/AmazonSSMFullAccess"<BR>
> **EC2ContainerRegistry**: "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess" <BR>
> **S3**: "arn:aws:iam::aws:policy/AmazonS3FullAccess"

## 2. Set default parameters

In [6]:
import boto3
import sagemaker
from utils.s3 import s3_handler
from utils.ssm import parameter_store

In [7]:
strRegionName=boto3.Session().region_name
pm = parameter_store(strRegionName)
s3 = s3_handler(strRegionName)

This is a S3 handler with [ap-northeast-2] region.


In [8]:
strPrefix = "SM-NeMo-"

* Bucket 생성
    * bucket 명은 소문자만 가능

In [10]:
strBucketName = ''.join([strPrefix, "BUCKET"]).lower()
s3.create_bucket(bucket_name=strBucketName)

In [11]:
pm.put_params(key="PREFIX", value=strPrefix, overwrite=True)
pm.put_params(key="".join([strPrefix, "REGION"]), value=strRegionName, overwrite=True)
pm.put_params(key="".join([strPrefix, "BUCKET"]), value=strBucketName, overwrite=True)
pm.put_params(key="".join([strPrefix, "SAGEMAKER-ROLE-ARN"]), value=get_execution_role(), overwrite=True)
pm.put_params(key="".join([strPrefix, "ACCOUNT-ID"]), value=boto3.client("sts").get_caller_identity().get("Account"), overwrite=True)

'Store suceess'

In [26]:
pm.put_params(key="PREFIX", value=strPrefix, overwrite=True, enc=False)

'Store suceess'

## 3. Create custom docker image for preprocessing

* docker build

In [12]:
from utils.ecr import ecr_handler

In [13]:
ecr = ecr_handler()

In [14]:
!pygmentize custom-docker/Dockerfile

[37m## docker build -f Dockerfile -t 322537213286.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.13.1-gpu-py39-nemo-main .[39;49;00m[37m[39;49;00m
[37m[39;49;00m
[37m# From 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.13.1-gpu-py39[39;49;00m[37m[39;49;00m
[37m[39;49;00m
[37m# RUN pip install --no-cache-dir --upgrade pip \[39;49;00m[37m[39;49;00m
[37m#  && pip install --no-cache-dir -U omegaconf hydra-core librosa sentencepiece youtokentome inflect \[39;49;00m[37m[39;49;00m
[37m#  && pip install --no-cache-dir -U braceexpand webdataset editdistance jiwer \[39;49;00m[37m[39;49;00m
[37m#  && pip install --no-cache-dir -U pytorch-lightning \ [39;49;00m[37m[39;49;00m
[37m#  && pip install --no-cache-dir -qq https://github.com/pyannote/pyannote-audio/archive/develop.zip \ [39;49;00m[37m[39;49;00m
[37m#  && pip install --no-cache-dir git+https://github.com/huggingface/transformers \ [39;49;00m[37m[39;49;00m
[37m#  && pip install --no

In [15]:
strRepositoryName = "NeMo-Image"
strRepositoryName = strRepositoryName.lower()
strDockerDir = "./custom-docker/"
strTag = ":latest"

* Base 이미지의 region, account-id 확인 후 아래 파라미터 입력

In [16]:
ecr.build_docker(strDockerDir, strRepositoryName, strRegionName="us-west-2", strAccountId="763104351884")

/home/ec2-user/SageMaker/nemo-on-sagemaker
/home/ec2-user/SageMaker/nemo-on-sagemaker/custom-docker
aws ecr get-login --region 'us-west-2' --registry-ids '763104351884' --no-include-email


https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded

Sending build context to Docker daemon   12.8kB

Step 1/5 : FROM 763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-training:1.13.1-gpu-py39
 ---> f0cd3f7ded0e
Step 2/5 : RUN apt -y update && apt -y install sox
 ---> Using cache
 ---> 03ea9e49dca9
Step 3/5 : RUN apt-get -y install libsox-fmt-all
 ---> Using cache
 ---> e10716b824bd
Step 4/5 : RUN pip install --no-cache-dir --upgrade pip  && pip install --no-cache-dir -U omegaconf hydra-core librosa sentencepiece youtokentome inflect sox  && pip install --no-cache-dir -U braceexpand webdataset editdistance jiwer  && pip install --no-cache-dir -U pytorch-lightning  && pip install --no-cache-dir -qq https://github.com/pyannote/pyannote-audio/archive/develop.zip  && pip install --no-cache-dir git+https://github.com/huggingface/transformers  && pip install --no-cache-dir git+https://github.com/NVIDIA/NeMo.git@main
 ---> Running in 50e1789ca22a
Collecting pip
  Downloading pip-23.0.1-py3-none-any.whl (2.1 MB)
     ━━━━━━━━

* Push the image to ECR

In [17]:
strRegionName=boto3.Session().region_name
strAccountId = boto3.client("sts").get_caller_identity().get("Account")
strEcrRepositoryUri = ecr.register_image_to_ecr(strRegionName, strAccountId, strRepositoryName, strTag)

== REGISTER AN IMAGE TO ECR ==
  processing_repository_uri: 419974056037.dkr.ecr.ap-northeast-2.amazonaws.com/nemo-image:latest
aws ecr get-login --region 'ap-northeast-2' --registry-ids '419974056037' --no-include-email


https://docs.docker.com/engine/reference/commandline/login/#credentials-store



Login Succeeded

aws ecr create-repository --repository-name 'nemo-image'
docker tag 'nemo-image:latest' '419974056037.dkr.ecr.ap-northeast-2.amazonaws.com/nemo-image:latest'
docker push '419974056037.dkr.ecr.ap-northeast-2.amazonaws.com/nemo-image:latest'
== REGISTER AN IMAGE TO ECR ==


In [18]:
#strEcrRepositoryUri = "419974056037.dkr.ecr.ap-northeast-2.amazonaws.com/nemo-image"

* Save image-uri to parameter store

In [18]:
pm.put_params(key="".join([strPrefix, "IMAGE-URI"]), value=strEcrRepositoryUri, overwrite=True)

'Store suceess'

## 4. Download & Upload dataset

In [19]:
import os
import wget

In [20]:
data_dir = "./data"

In [21]:
print("******")
os.makedirs(data_dir, exist_ok=True)
if not os.path.exists(data_dir + '/an4_sphere.tar.gz'):
    an4_url = 'https://dldata-public.s3.us-east-2.amazonaws.com/an4_sphere.tar.gz'
    an4_path = wget.download(an4_url, data_dir)
    print(f"Dataset downloaded at: {an4_path}")
else:
    print("Tarfile already exists.")
    an4_path = data_dir + '/an4_sphere.tar.gz'

******
Tarfile already exists.


* updoad data to s3

In [22]:
from utils.s3 import s3_handler

In [23]:
s3 = s3_handler()

This is a S3 handler with [None] region.


In [24]:
source_dir, target_bucket, target_dir = data_dir, pm.get_params(key=strPrefix+"BUCKET"), "data"
s3.upload_dir(source_dir, target_bucket, target_dir)

Upload:[./data] was uploaded to [s3://sm-nemo-bucket/data]successfully


In [37]:
pm.put_params(key="".join([strPrefix, "S3-DATA-PATH"]), value=f"s3://{target_bucket}/{target_dir}", overwrite=True)

'Store suceess'

## 5. Check parameters

In [19]:
print (f'PREFIX: {pm.get_params(key="PREFIX")}')
print (f'REGION: {pm.get_params(key="".join([strPrefix, "REGION"]))}')
print (f'BUCKET: {pm.get_params(key="".join([strPrefix, "BUCKET"]))}')
print (f'AGEMAKER-ROLE-ARN: {pm.get_params(key="".join([strPrefix, "SAGEMAKER-ROLE-ARN"]))}')
print (f'ACCOUNT-ID: {pm.get_params(key="".join([strPrefix, "ACCOUNT-ID"]))}')
print (f'IMAGE-URI: {pm.get_params(key="".join([strPrefix, "IMAGE-URI"]))}')
print (f'S3-DATA-PATH: {pm.get_params(key="".join([strPrefix, "S3-DATA-PATH"]))}')

PREFIX: SM-NeMo-
REGION: ap-northeast-2
BUCKET: sm-nemo-bucket
AGEMAKER-ROLE-ARN: arn:aws:iam::419974056037:role/service-role/AmazonSageMaker-ExecutionRole-20221206T163436
ACCOUNT-ID: 419974056037
IMAGE-URI: 419974056037.dkr.ecr.ap-northeast-2.amazonaws.com/nemo-image
S3-DATA-PATH: s3://sm-nemo-bucket/data
