In [1]:
import sys
sys.path.append("../..")

import os
import shutil

from utils_nlp.azureml.azureml_utils import get_or_create_workspace


In [2]:

    ws = get_or_create_workspace(
    subscription_id="15ae9cb6-95c1-483d-a0e3-b1a1a3b06324",
    resource_group="nlprg",
    workspace_name="MAIDAIPBERT-eastus",
    workspace_region="East US",
)


In [None]:
'''
    ws = get_or_create_workspace(
    subscription_id="15ae9cb6-95c1-483d-a0e3-b1a1a3b06324",
    resource_group="nlprg",
    workspace_name="MAIDAIPBERT-eastus",
    workspace_region="East US",
)
'''


In [3]:
print("Workspace name: {}".format(ws.name))
print("Resource group: {}".format(ws.resource_group))

Workspace name: MAIDAIPBERT-eastus
Resource group: nlprg


In [4]:
cluster_name = "bertncrs24"

In [5]:
from azureml.core.compute import ComputeTarget

In [6]:
try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print("Found compute target: {}".format(cluster_name))
except ComputeTargetException:
    print("Creating new compute target: {}".format(cluster_name))
    compute_config = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_NC6", max_nodes=1
    )
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
    compute_target.wait_for_completion(show_output=True)


print(compute_target.get_status().serialize())

Found compute target: bertncrs24
{'currentNodeCount': 1, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 1, 'preemptedNodeCount': 0}, 'allocationState': 'Resizing', 'allocationStateTransitionTime': '2019-07-24T20:58:25.663000+00:00', 'errors': None, 'creationTime': '2019-07-12T19:59:45.933132+00:00', 'modifiedTime': '2019-07-12T20:00:01.793458+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_NC24RS_V3'}


In [7]:
DEBUG = True
project_dir = "./entail"
if DEBUG and os.path.exists(project_dir): 
    shutil.rmtree(project_dir) 
shutil.copytree("../../utils_nlp", os.path.join(project_dir, "utils_nlp"))

'./entail\\utils_nlp'

In [8]:
%%writefile $project_dir/train.py

import horovod.torch as hvd
import torch
import numpy as np
import argparse
from torch.utils.data import DataLoader, SequentialSampler
from utils_nlp.dataset.xnli_dataset import XnliDataset
from utils_nlp.bert.common import Language
from pytorch_pretrained_bert.optimization import BertAdam
from utils_nlp.bert.sequence_classification_distributed import BERTSequenceClassifier
from sklearn.metrics import classification_report

from azureml.core.run import Run
# get the Azure ML run object
run = Run.get_context()

print("Torch version:", torch.__version__)

LANGUAGE_ENGLISH = "en"
CACHE_DIR = "./"
TRAIN_FILE_SPLIT = "train"
TEST_FILE_SPLIT = "test"
TO_LOWERCASE = True 
PRETRAINED_BERT_LNG = Language.ENGLISH

# optimizer configurations
LEARNING_RATE= 5e-5
WARMUP_PROPORTION= 0.1
BATCH_SIZE = 32

NUM_GPUS = 4

hvd.init()

parser = argparse.ArgumentParser()
# Training settings
parser.add_argument('--seed', type=int, default=42, metavar='S',help='random seed (default: 42)')
parser.add_argument('--epochs', type=int, default=2, metavar='N', help='number of epochs to train (default: 2)')
parser.add_argument('--num_workers', type=int, default=2, metavar='N', help='number of workers to train (default: 2)')
parser.add_argument('--no-cuda', action='store_true', default=False,help='disables CUDA training')


args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()
print(args.cuda)

if args.cuda:
    torch.cuda.set_device(hvd.local_rank())
    torch.cuda.manual_seed(args.seed)

#kwargs = {}
kwargs = {'num_workers': 2, 'pin_memory': True} if args.cuda else {}

train_dataset = XnliDataset(file_split=TRAIN_FILE_SPLIT, 
                            cache_dir=CACHE_DIR, 
                            language=LANGUAGE_ENGLISH,
                            to_lowercase=TO_LOWERCASE,
                            tok_language=PRETRAINED_BERT_LNG)

train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, num_replicas=hvd.size(), rank=hvd.local_rank())
train_loader =  DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler, **kwargs)
    
#set the label_encoder for evaluation datset
label_encoder = train_dataset.label_encoder
num_labels = len(np.unique(train_dataset.labels))

print("============================= Data set size ========================")
classifier = BERTSequenceClassifier(language=PRETRAINED_BERT_LNG,
                                            num_labels=num_labels,
                                            cache_dir=CACHE_DIR,
                                            )

# optimizer configurations
num_samples = len(train_loader.dataset)
num_batches = int(num_samples/BATCH_SIZE)
num_workers = args.num_workers
num_train_optimization_steps = num_batches*args.epochs #int(num_batches/hvd.size()) * args.epochs 
optimizer_grouped_parameters = classifier.optimizer_params

print("================= num_train_optimization_steps ==============================")
print(num_train_optimization_steps)

lr=LEARNING_RATE * hvd.size()

bert_optimizer = BertAdam(optimizer_grouped_parameters,
                   lr=lr,
                   t_total=num_train_optimization_steps,
                   warmup=WARMUP_PROPORTION,)

if WARMUP_PROPORTION is None:
    print("================== Without Warmup proprtion ===========================")
    bert_optimizer = BertAdam(optimizer_grouped_parameters, lr=lr)
else:
    print("================== With Warmup proportion =============================")
    bert_optimizer = BertAdam(optimizer_grouped_parameters,
                   lr=lr,
                   t_total=num_train_optimization_steps,
                   warmup=WARMUP_PROPORTION,
                  )


## Distributed optimizer
bert_optimizer = hvd.DistributedOptimizer(bert_optimizer, classifier.model.named_parameters())
hvd.broadcast_parameters(classifier.model.state_dict(), root_rank=0)

#remove later
if(hvd.rank() == 0):
    print("===================== rank rank =======================", hvd.rank())
else:
    print("===== not master rank =================================")
    

classifier.fit(train_loader, bert_optimizer, args.epochs, NUM_GPUS, hvd.rank())

#evaluation
if(hvd.rank() == 0):
    kwargs = {}
    test_dataset = XnliDataset(file_split=TEST_FILE_SPLIT,
                           cache_dir=CACHE_DIR,
                           language=LANGUAGE_ENGLISH,
                           to_lowercase=TO_LOWERCASE,
                           tok_language=PRETRAINED_BERT_LNG
                          )    
    test_sampler = SequentialSampler(test_dataset)  
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, sampler=test_sampler, **kwargs)
    
    predictions = classifier.predict(test_loader, NUM_GPUS, BATCH_SIZE, probabilities=False)
    
    print('=================== Predictions =====================')
    print(predictions)

    test_dict = next(iter(test_loader))
    test_labels = test_dict['labels']
    predictions= label_encoder.inverse_transform(predictions)
    print(classification_report(test_labels, predictions))

Writing ./entail/train.py


In [9]:
import torch
import azureml.core
from azureml.train.dnn import PyTorch
from azureml.core.runconfig import MpiConfiguration
from azureml.core import Experiment
from azureml.widgets import RunDetails

In [10]:
NODE_COUNT = 2
mpiConfig=MpiConfiguration()
mpiConfig.process_count_per_node=4

est = PyTorch(
    source_directory=project_dir,
    compute_target=compute_target,
    entry_script="train.py",
    node_count=NODE_COUNT,
    distributed_training=mpiConfig,
    use_gpu=True,
    framework_version="1.0",
    conda_packages=["scikit-learn=0.20.3", "numpy", "spacy", "nltk"],
    pip_packages=["pandas","seqeval[gpu]", "pytorch-pretrained-bert"],
)

In [None]:
print("Azure ML SDK Version:", azureml.core.VERSION)

In [11]:

experiment = Experiment(ws, name="nlp-entailment-bert")
run = experiment.submit(est)

In [12]:
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [None]:
run.cancel()

In [None]:
run.get_details

In [None]:
model = run.register_model(model_name='outputs', model_path='outputs')
print(model.name, model.id, model.version, sep='\t')

This statement downloads the model to local and you can use this to run predictions locally!

In [None]:
model.download(exist_ok=True)

## Prediction 

In [None]:
# is this requied to call files from the datastore for deployment ??
DEBUG = True
project_dir = "./entailment_aml"
if DEBUG and os.path.exists(project_dir): 
    shutil.rmtree(project_dir) 
shutil.copytree("../../utils_nlp", os.path.join(project_dir, "utils_nlp"))

In [None]:
from utils_nlp.dataset.xnli import load_pandas_df
test_df = load_pandas_df(local_cache_path="../../temp", file_split="test", language="en")

test_data_used_count = round(0.0025 * test_df.shape[0])
test_df = test_df.loc[:test_data_used_count]

In [None]:
test_df

In [None]:
import torch
model = torch.load('bert_entailment.model')


In [None]:
import sys
sys.path.append("../..")

In [None]:
%%writefile score.py

import torch
from sklearn.externals import joblib
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import SequentialSampler, DataLoader
from utils_nlp.dataset.xnli_dataset_test import XnliDatasetTest
from utils_nlp.bert.common import Language
from utils_nlp.bert.sequence_classification_distributed import BERTSequenceClassifier
from azureml.core.model import Model

def init():
    global model
    
    model_path = Model.get_model_path('bert_entailment')
    print('Model Path ::', model_path)
    #model = model.load_state_dict(torch.load(model_path, map_location=device))

def run():

    LANGUAGE_ENGLISH = Language.ENGLISH
    CACHE_DIR = "../../temp"
    
    print("===============load model=============")
    
    model_path = 'outputs/bert-large-uncased'
    model = torch.load(model_path, map_location=lambda storage, loc: storage)
    bert_model = BERTSequenceClassifier(language=LANGUAGE_ENGLISH,
                  num_labels=3,
                  cache_dir=CACHE_DIR,
                 )
    
    #bert_model.model = model
    #print("============= bert model ==================")
    #print(bert_model.model)
    
    num_gpus = 0
    batch_size = 32
    probabilities = False
    test = XnliDatasetTest()
    test_dataset = test
    test_sampler = SequentialSampler(test_dataset)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, sampler=test_sampler)
    
    
    
    print("make predictions now please :(")
    predictions = bert_model.predict(test_loader, num_gpus, batch_size, probabilities)
    print('=================== Predictions =====================')
    print(predictions)
     

In [None]:
import score
predictions = score.run()

In [None]:
#run.cancel()