In [132]:
%%writefile script/train.py

from DeepPurpose import utils, CompoundPred
from tdc.single_pred import ADME
import warnings
import argparse
import os
warnings.filterwarnings("ignore")

if __name__ == "__main__":
    
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_dir', type=str, default=os.environ['SM_MODEL_DIR'])
    parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))
    args = parser.parse_args()
    print ("*****training file*****")
    
    files = []
    # r=root, d=directories, f = files
    for r, d, f in os.walk(args.train):
        for file in f:
            files.append(os.path.join(r, file))

    for f in files:
        print(f)
        
    X, y = ADME(name = 'HIA_Hou').get_data(format = 'DeepPurpose')
    drug_encoding = 'MPNN'
    train, val, test = utils.data_process(X_drug = X, 
                                      y = y, 
                                      drug_encoding = drug_encoding,
                                      random_seed = 'TDC')
    config = utils.generate_config(drug_encoding = drug_encoding, 
                         train_epoch = 3, 
                         LR = 0.001, 
                         batch_size = 128,
                         mpnn_hidden_size = 32,
                         mpnn_depth = 2
                        )
    model = CompoundPred.model_initialize(**config)
    model.train(train, val, test)
    
    model.save_model(args.model_dir)

Overwriting script/train.py


In [133]:
%%writefile Dockerfile

# Part of the implementation of this container is based on the Amazon SageMaker Apache MXNet container.
# https://github.com/aws/sagemaker-mxnet-container

FROM ubuntu:16.04

LABEL maintainer="Amazon AI"

# Defining some variables used at build time to install Python3
ARG PYTHON=python3
ARG PYTHON_PIP=python3-pip
ARG PIP=pip3
ARG PYTHON_VERSION=3.6.6

# Install some handful libraries like curl, wget, git, build-essential, zlib
RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common && \
    add-apt-repository ppa:deadsnakes/ppa -y && \
    apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        ca-certificates \
        curl \
        wget \
        git \
        libopencv-dev \
        openssh-client \
        openssh-server \
        vim \
        zlib1g-dev && \
    rm -rf /var/lib/apt/lists/*

# Installing Python3
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \
        tar -xvf Python-$PYTHON_VERSION.tgz && cd Python-$PYTHON_VERSION && \
        ./configure && make && make install && \
        apt-get update && apt-get install -y --no-install-recommends libreadline-gplv2-dev libncursesw5-dev libssl-dev libsqlite3-dev tk-dev libgdbm-dev libc6-dev libbz2-dev && \
        make && make install && rm -rf ../Python-$PYTHON_VERSION* && \
        ln -s /usr/local/bin/pip3 /usr/bin/pip

# Upgrading pip and creating symbolic link for python3
RUN ${PIP} --no-cache-dir install --upgrade pip
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python

WORKDIR /

# Installing numpy, pandas, scikit-learn, scipy
RUN ${PIP} install --no-cache --upgrade \
        numpy==1.14.5 \
        pandas==0.24.1 \
        scikit-learn==0.20.3 \
        requests==2.21.0 \
        scipy==1.2.2

# Setting some environment variables.
ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/lib" \
    PYTHONIOENCODING=UTF-8 \
    LANG=C.UTF-8 \
    LC_ALL=C.UTF-8

RUN ${PIP} install --no-cache --upgrade \
    sagemaker-training


RUN pip install rdkit-pypi
RUN pip install PyTDC
RUN pip install pandas-flavor
RUN pip install git+https://github.com/bp-kelley/descriptastorus
RUN pip install DeepPurpose

# Copies code under /opt/ml/code where sagemaker-containers expects to find the script to run
COPY script/train.py /opt/ml/code/train.py

# Defines train.py as script entry point
ENV SAGEMAKER_PROGRAM train.py


Overwriting Dockerfile


In [134]:
%%sh

docker_name=sagemaker-deeppurpose
docker build -t $docker_name -f Dockerfile .

account=$(aws sts get-caller-identity --query Account --output text)
echo $account
region=$(aws configure get region)

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${docker_name}:latest"
# If the repository doesn't exist in ECR, create it.
aws ecr describe-repositories --repository-names "${docker_name}" > /dev/null 2>&1
if [ $? -ne 0 ]
then
    aws ecr create-repository --repository-name "${docker_name}" > /dev/null
fi

# Get the login command from ECR and execute it directly
$(aws ecr get-login --region ${region} --no-include-email)

docker tag ${docker_name} ${fullname}
docker push ${fullname}


Step 1/21 : FROM ubuntu:16.04
 ---> b6f507652425
Step 2/21 : LABEL maintainer="Amazon AI"
 ---> Using cache
 ---> 7d3810176a2e
Step 3/21 : ARG PYTHON=python3
 ---> Using cache
 ---> 683c419be179
Step 4/21 : ARG PYTHON_PIP=python3-pip
 ---> Using cache
 ---> b8624329a0e4
Step 5/21 : ARG PIP=pip3
 ---> Using cache
 ---> a42942582dae
Step 6/21 : ARG PYTHON_VERSION=3.6.6
 ---> Using cache
 ---> 1a0cabeefc40
Step 7/21 : RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common &&     add-apt-repository ppa:deadsnakes/ppa -y &&     apt-get update && apt-get install -y --no-install-recommends         build-essential         ca-certificates         curl         wget         git         libopencv-dev         openssh-client         openssh-server         vim         zlib1g-dev &&     rm -rf /var/lib/apt/lists/*
 ---> Using cache
 ---> 0ea7cc8cd8de
Step 8/21 : RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz &&         tar -

https://docs.docker.com/engine/reference/commandline/login/#credentials-store



In [135]:
import sagemaker
from sagemaker import get_execution_role
from sagemaker.session import Session

# Setup session
sess = sagemaker.Session()

# S3 bucket for saving code and model artifacts.
# Feel free to specify a different bucket here if you wish.
bucket = sess.default_bucket()

# # Location to put your custom code.
# custom_code_upload_location = "deeppurpose/customcode"

# IAM execution role that gives Amazon SageMaker access to resources in your AWS account.
# You can use the Amazon SageMaker Python SDK to get the role from the notebook environment.
role = get_execution_role()

In [136]:
import boto3

from sagemaker.local import LocalSession
sagemaker_session = LocalSession()

# Set target dgl-docker name
docker_name = "sagemaker-deeppurpose"

CODE_PATH = "script/train.py"
code_location = sess.upload_data(CODE_PATH, bucket=bucket, key_prefix=custom_code_upload_location)

account = sess.boto_session.client("sts").get_caller_identity()["Account"]
region = sess.boto_session.region_name
image = "{}.dkr.ecr.{}.amazonaws.com/{}:latest".format(account, region, docker_name)
print(image)
task_tags = [{"Key": "ML Task", "Value": "deeppurpose"}]
estimator = sagemaker.estimator.Estimator(
    image,
    role,
    instance_count=1,
    instance_type="local",
    tags=task_tags,
    sagemaker_session=sagemaker_session,
    #hyperparameters={"sagemaker_program": "train.py", "sagemaker_submit_directory": code_location}
)

485822383573.dkr.ecr.us-east-1.amazonaws.com/sagemaker-deeppurpose:latest


In [137]:
train_prefix='deeppurpose/data'
train_data=sess.upload_data('DeepPurpose-master/toy_data/AID1706.txt', bucket=bucket, key_prefix='deeppurpose/data')
print(train_data)


# train_config=sagemaker.session.s3_input(
#     train_data, content_type="text/csv"
# )
estimator.fit({"train":train_data})

s3://sagemaker-us-east-1-485822383573/deeppurpose/data/AID1706.txt
Creating dzj1jounl0-algo-1-jzngf ... 
Creating dzj1jounl0-algo-1-jzngf ... done
Attaching to dzj1jounl0-algo-1-jzngf
[36mdzj1jounl0-algo-1-jzngf |[0m   from cryptography.hazmat.backends import default_backend
[36mdzj1jounl0-algo-1-jzngf |[0m 2022-07-21 00:33:25,787 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36mdzj1jounl0-algo-1-jzngf |[0m 2022-07-21 00:33:25,800 sagemaker-training-toolkit INFO     instance_groups entry not present in resource_config
[36mdzj1jounl0-algo-1-jzngf |[0m 2022-07-21 00:33:25,805 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36mdzj1jounl0-algo-1-jzngf |[0m 2022-07-21 00:33:25,818 sagemaker-training-toolkit INFO     instance_groups entry not present in resource_config
[36mdzj1jounl0-algo-1-jzngf |[0m 2022-07-21 00:33:25,821 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
