In [1]:
%matplotlib inline
import numpy as np
import os
import matplotlib.pyplot as plt
from azureml.core.dataset import Dataset
from azureml.core.datastore import Datastore

import diff_classifier

In [2]:
import azureml
from azureml.core import Workspace

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.49.0


In [3]:
ws = Workspace.get(
    name='schimek_mpt_eastus2',
    subscription_id='9c49afdd-cccb-44e2-a4eb-5b2f941c774c',
    resource_group='schimek_cloud',
    location='eastus2')

In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "nlsschim-gpu-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing compute target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2023-04-19T00:48:23.938000+00:00', 'errors': None, 'creationTime': '2023-03-30T22:33:31.869377+00:00', 'modifiedTime': '2023-03-30T22:33:35.839923+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 1, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_NV6'}


In [5]:
compute_targets = ws.compute_targets
for name, ct in compute_targets.items():
    print(name, ct.type, ct.provisioning_state)

nlsschim-gpu-cluster AmlCompute Succeeded


In [6]:
%%writefile conda_dependencies.yml

channels:
- conda-forge
dependencies:
- python=3.7
- pip=21.3.1
- pip:
  - h5py<=2.10.0
  - azureml-defaults
  - tensorflow-gpu==2.0.0
  - keras<=2.3.1
  - matplotlib
  - protobuf==3.20.1
  - stochastic
  - diff_classifier
  - scikit-learn

Overwriting conda_dependencies.yml


In [7]:
from azureml.core import Environment

keras_env = Environment.from_conda_specification(name = 'keras-2.3.1', file_path = './conda_dependencies.yml')

# Specify a GPU base image
keras_env.docker.enabled = True
keras_env.docker.base_image = 'mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu20.04'

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


In [None]:
datastore = Datastore.get(ws, 'workspaceblobstore')
file_dataset = Dataset.File.from_files(path=(datastore, 'UI/2023-04-06_224259_UTC/'))

#dataset.to_path()

In [None]:
temp = Dataset.get_by_name(workspace = ws, name = 'msd_age_data', version='latest')

In [69]:
dataset = file_dataset.register(workspace = ws, name = 'age_data_trajectories', create_new_version=True)

In [70]:
file_dataset.to_path()

['/msd_P14_40nm_s1_v1.csv',
 '/msd_P14_40nm_s1_v2.csv',
 '/msd_P14_40nm_s1_v3.csv',
 '/msd_P14_40nm_s1_v4.csv',
 '/msd_P14_40nm_s1_v5.csv',
 '/msd_P14_40nm_s2_v1.csv',
 '/msd_P14_40nm_s2_v2.csv',
 '/msd_P14_40nm_s2_v3.csv',
 '/msd_P14_40nm_s2_v4.csv',
 '/msd_P14_40nm_s2_v5.csv',
 '/msd_P14_40nm_s3_v1.csv',
 '/msd_P14_40nm_s3_v2.csv',
 '/msd_P14_40nm_s3_v3.csv',
 '/msd_P14_40nm_s3_v4.csv',
 '/msd_P14_40nm_s3_v5.csv',
 '/msd_P28_40nm_s1_v1.csv',
 '/msd_P28_40nm_s1_v2.csv',
 '/msd_P28_40nm_s1_v3.csv',
 '/msd_P28_40nm_s1_v4.csv',
 '/msd_P28_40nm_s1_v5.csv',
 '/msd_P28_40nm_s2_v1.csv',
 '/msd_P28_40nm_s2_v2.csv',
 '/msd_P28_40nm_s2_v3.csv',
 '/msd_P28_40nm_s2_v4.csv',
 '/msd_P28_40nm_s2_v5.csv',
 '/msd_P28_40nm_s3_v1.csv',
 '/msd_P28_40nm_s3_v2.csv',
 '/msd_P28_40nm_s3_v4.csv',
 '/msd_P28_40nm_s3_v5.csv',
 '/msd_P35_brain_2_slice_1_vid_1.csv',
 '/msd_P35_brain_2_slice_1_vid_2.csv',
 '/msd_P35_brain_2_slice_1_vid_3.csv',
 '/msd_P35_brain_2_slice_1_vid_4.csv',
 '/msd_P35_brain_2_slice_1_vid_5

In [25]:
from azureml.core import ScriptRunConfig

#args = ['--input-data', dataset.as_named_input("trajectories")]#,
#         '--batch-size', 50,
#         '--first-layer-neurons', 300,
#         '--second-layer-neurons', 100,
#         '--learning-rate', 0.001]

src = ScriptRunConfig(source_directory='.',
                      script='Diffusion_coeff_net_training.py',
                      #arguments=args,
                      compute_target=compute_target,
                      environment=keras_env)

In [26]:
from azureml.core import Experiment
exp = Experiment(workspace=ws, name='anom_diff')

In [27]:
run = exp.submit(src)

In [28]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…