In [None]:
%matplotlib inline
import numpy as np
import os
import matplotlib.pyplot as plt

import azureml
from azureml.core import Workspace, Dataset

print("Azure ML SDK Version: ", azureml.core.VERSION)

# Inicjacja przestrzeni roboczej

In [None]:
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep='\n')

# Utworzenie eksperymentu Azure ML

In [None]:
from azureml.core import Experiment

script_folder = './scripts'
os.makedirs(script_folder, exist_ok=True)

retrain_folder = './retrain'

exp = Experiment(workspace=ws, name='textgen')

In [4]:
datastore = ws.get_default_datastore()
dataset = Dataset.File.from_files(path=(datastore, 'Corpus/01-17-2021_104148_UTC/'))

mount_ctx = dataset.mount()  
mount_ctx.start() 

dataset_mount_folder = mount_ctx.mount_point
print(dataset_mount_folder)

files = os.listdir(dataset_mount_folder)
print(files)

/tmp/tmpkfphc6ld
['corpus_text_12000000.txt', 'corpus_text_16000000.txt', 'corpus_text_20000000.txt', 'corpus_text_24000000.txt', 'corpus_text_28000000.txt', 'corpus_text_32000000.txt', 'corpus_text_36000000.txt', 'corpus_text_4000000.txt', 'corpus_text_40000000.txt', 'corpus_text_8000000.txt']


# Utworzenie środowiska obliczeniowego

In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "ml-gpu-cl-lp"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing compute target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 1, 'targetNodeCount': 1, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 1, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-01-21T23:04:56.776000+00:00', 'errors': None, 'creationTime': '2021-01-21T13:20:48.526839+00:00', 'modifiedTime': '2021-01-21T13:21:04.170071+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 1, 'nodeIdleTimeBeforeScaleDown': 'PT1200S'}, 'vmPriority': 'LowPriority', 'vmSize': 'STANDARD_NC6'}


In [6]:
compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

ml-cpu ComputeInstance Succeeded
ml-gpu-cl AmlCompute Succeeded
ml-gpu-cl-lp AmlCompute Succeeded


# Kopiowanie danych treningowych do skryptu

In [25]:
import shutil

shutil.copy('./script.py', script_folder)
shutil.copy('./model/model', retrain_folder)

'./scripts/script.py'

In [26]:
with open(os.path.join(script_folder, './script.py'), 'r') as f:
    print(f.read())

import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from keras.callbacks import Callback

from azureml.core import Run

import numpy as np
import os
import time
import glob
import string
import matplotlib.pyplot as plt


parser = argparse.ArgumentParser()
parser.add_argument('--data-folder', type=str, dest='data_folder', default='data', help='data folder mounting point')
parser.add_argument('--model-folder', type=str, dest='model_folder', default=None, help='model to train')
parser.add_argument('--text-id', type=int, dest='text_id', default=1)
parser.add_argument('--text-size', type=int, dest='text_size', default=10000)
parser.add_argument('--epochs', type=int, dest='epochs', default=1)
args = parser.parse_args()

data_folder = args.data_folder
print('training dataset is stored here:', data_folder)
files = os.listdir(data_folder)

model = None
model

# Utworzenie środwiska

In [27]:
%%writefile conda_dependencies.yml

channels:
- conda-forge
dependencies:
- python=3.6.2
- pip:
  - h5py<=2.10.0
  - azureml-defaults
  - tensorflow-gpu
  - keras
  - tensorflow
  - matplotlib

Overwriting conda_dependencies.yml


In [31]:
from azureml.core import Environment

textgen_env = Environment.from_conda_specification(name = 'texgen', file_path = './conda_dependencies.yml')

# Specify a GPU base image
textgen_env.docker.enabled = True
textgen_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.0-cudnn7-ubuntu18.04'

#textgen_env = Environment.get(ws, name='AzureML-TensorFlow-2.3-GPU')

# Konfiguracja treningu

In [32]:
import tensorflow as tf
model = tf.saved_model.load('./model/model')
type(model)

tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject

In [33]:
from azureml.core import ScriptRunConfig

args = ['--data-folder', dataset.as_named_input('text').as_mount(),
        '--text-id', 1,
        #'--text-size', 10000,
        '--epochs', 1]

src = ScriptRunConfig(source_directory=script_folder,
                      script='script.py',
                      arguments=args,
                      compute_target=compute_target,
                      environment=textgen_env)

# Zatwierdzenie zadania do uruchomienia

In [34]:
run = exp.submit(src)

In [35]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [36]:
run

Experiment,Id,Type,Status,Details Page,Docs Page
textgen,textgen_1611271774_5faca64f,azureml.scriptrun,Queued,Link to Azure Machine Learning studio,Link to Documentation


In [37]:
run.wait_for_completion(show_output=True)

RunId: textgen_1611271774_5faca64f
Web View: https://ml.azure.com/experiments/textgen/runs/textgen_1611271774_5faca64f?wsid=/subscriptions/fc814e44-1cd5-4ab5-944b-f2255f816d34/resourcegroups/text-generation/workspaces/deep-learning

Streaming azureml-logs/55_azureml-execution-tvmps_cff03917453a8001f928502ec2a6147c013beeb956d6b3167d39aa64a00686f6_p.txt

2021-01-21T23:29:44Z Starting output-watcher...
2021-01-21T23:29:44Z IsDedicatedCompute == False, starting polling for Low-Pri Preemption
2021-01-21T23:29:45Z Executing 'Copy ACR Details file' on 10.0.0.5
2021-01-21T23:29:45Z Copy ACR Details file succeeded on 10.0.0.5. Output: 
>>>   
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_9d4fa30783fc98f2c7c7f19c6a312f30
Digest: sha256:96a223a2d683aab4b4f91719ba3f705a79883c430ca39e73845fd2ba36704f14
Status: Image is up to date for viennaglobal.azurecr.io/azureml/azureml_9d4fa30783fc98f2c7c7f19c6a312f30:latest
viennaglobal.azurecr.io/azureml/azureml_9d4fa30

In [None]:
run.get_details()

In [None]:
run.get_metrics()

In [None]:
run.get_file_names()

In [None]:
# create a model folder in the current directory
os.makedirs('./model', exist_ok=True)

for f in run.get_file_names():
    if f.startswith('outputs/'):
        print(f.replace('outputs/model/', ""))
        output_path = os.path.join('./model/', f.replace('outputs/model/', ""))
        print('Downloading from {} to {} ...'.format(f, output_path))
        run.download_file(name=f, output_file_path=output_path)