In [7]:
!pip install wandb
!pip install sagemaker -U
!pip install sagemaker-experiments -U

Collecting sagemaker
  Downloading sagemaker-2.45.0.tar.gz (414 kB)
[K     |████████████████████████████████| 414 kB 18.7 MB/s eta 0:00:01
Building wheels for collected packages: sagemaker
  Building wheel for sagemaker (setup.py) ... [?25ldone
[?25h  Created wheel for sagemaker: filename=sagemaker-2.45.0-py2.py3-none-any.whl size=582729 sha256=033087fa448d0784c29f1c850b3fe721c790cbdc5ddedad774564b32ebd59e64
  Stored in directory: /root/.cache/pip/wheels/a0/f4/09/f46003f23abdc37b204d1bb68654108132a575d5df3a29b104
Successfully built sagemaker
Installing collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.42.0
    Uninstalling sagemaker-2.42.0:
      Successfully uninstalled sagemaker-2.42.0
Successfully installed sagemaker-2.45.0
Collecting sagemaker-experiments
  Downloading sagemaker_experiments-0.1.32-py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 118 kB/s  eta 0:00:01
Installing collected packages

In [15]:
import sagemaker

sagemaker.__version__
#wandb.sagemaker_auth(path="code")

'2.42.0'

In [16]:
from smexperiments.experiment import Experiment
from smexperiments.trial import Trial
from smexperiments.trial_component import TrialComponent
from smexperiments.tracker import Tracker
import json
import boto3
import sagemaker
from sagemaker.pytorch import PyTorch
import wandb
import time
sess = boto3.Session(region_name='us-east-1')
sm = sess.client('sagemaker')

In [12]:
import os
sagemaker.get_execution_role() 
role =sagemaker.get_execution_role() 

Create experiment

In [17]:
trainer_experiment = Experiment.create(
    experiment_name=f"IEMOCAP-AUDIO-4classes-{int(time.time())}", 
    description="Classification of IEMOCAP dataset using 4 classes and audio", 
    sagemaker_boto_client=sm)
print(trainer_experiment)


Experiment(sagemaker_boto_client=<botocore.client.SageMaker object at 0x7fcc3b528190>,experiment_name='IEMOCAP-AUDIO-4classes-1623118261',description='Classification of IEMOCAP dataset using 4 classes and audio',tags=None,experiment_arn='arn:aws:sagemaker:us-east-1:278741951345:experiment/iemocap-audio-4classes-1623118261',response_metadata={'RequestId': '194d2d0d-9bb9-4cef-a737-0890b0e84b54', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': '194d2d0d-9bb9-4cef-a737-0890b0e84b54', 'content-type': 'application/x-amz-json-1.1', 'content-length': '105', 'date': 'Tue, 08 Jun 2021 02:11:01 GMT'}, 'RetryAttempts': 0})


## Training

In [19]:
sr = 16000
preemph_coef = 0.97
sample_rate = sr
window_size = 0.025
window_stride = 0.01
num_mel_bins = 40

n_fft = 512#int(sample_rate * window_size
win_length = int(sample_rate * window_size)#None#
hop_length = int(sample_rate * window_stride)#256#


# The bucket containig our input data
data_root = 's3://XXXX/datasets/IEMOCAP/TRAINING_VIDEOS/'
audio_folder = 's3://XXXX/datasets/IEMOCAP/LOGMEL_DELTAS'
video_folder = 's3://XXXX/datasets/IEMOCAP/TRAINING_VIDEOS'
weights_root = 's3://XXXX/datasets/IEMOCAP/MODELS/Resnet3D'
checkpoint_s3 = 's3://XXXX/training_checkpoints/'

with Tracker.create(display_name="Preprocessing", sagemaker_boto_client=sm) as tracker:
    tracker.log_parameters({
        "num_frames": 4,
        "frame_length": 1.0,
        "output_length": 100,
        "win_length":win_length,
        "hop_length":hop_length,
        "n_fft":n_fft,
        "num_mel_bins":num_mel_bins        
    })
    # we can log the s3 uri to the dataset we just uploaded
    tracker.log_input(name="iemocap-audio-frames", media_type="s3/uri", value=data_root)


In [220]:
train_dict = {
    'max_nb_epochs': 1000,
    'batch_size': 32,
    'accum_grad_batches':1,
    'precision': 16,
    'fixed-data': 1,
    'sweep-name': 'MultimodalMatchmap-IEMOCAP',
    'model-name': 'MultimodalMatchmap',
    'early-stop-num': 5,
    'dataset': 'iemocap',
    'fold': 1,
    'model_type':'matchmap',
    'model_load_from_checkpoint':0,
    'use_mixed_iemocap':1,
    'gradient_clip_val':5.0
  }

model_dict = {
    'learning_rate' :0.0001077,
    'weight_decay' :0.001409,
}

preprocessing_trial_component = tracker.trial_component
# create trial
trial_name = f"{train_dict['model-name']}-training-job-{int(time.time())}"
exp_trial = Trial.create(
    trial_name=trial_name, 
    experiment_name=trainer_experiment.experiment_name,
    sagemaker_boto_client=sm,
)

# associate the proprocessing trial component with the current trial
exp_trial.add_trial_component(preprocessing_trial_component)
estimator = PyTorch(
    # name of the runnable script containing __main__ function (entrypoint)
    entry_point='train.py',
    # path of the folder containing training code. It could also contain a
    # requirements.txt file with all the dependencies that needs
    # to be installed before running
    source_dir='.',
    role=role,
    sagemaker_session=sagemaker.Session(sagemaker_client=sm),
    boto_session=sess,
    framework_version='1.8',
    instance_count=1,
    instance_type='ml.p3.2xlarge',#'ml.p3.8xlarge',#'ml.g4dn.2xlarge', #
    py_version='py3',
    # these hyperparameters are passed to the main script as arguments and 
    # can be overridden when fine tuning the algorithm
    hyperparameters={**train_dict,**model_dict},
#     use_spot_instances = True,
#     max_wait           = 45*60*60,
    max_run           = 6*60*60,
    checkpoint_s3_uri = checkpoint_s3 + trial_name,
    volume_size=50,
    enable_sagemaker_metrics=True
)

In [221]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner

hyperparameter_ranges = {
                         'fold': IntegerParameter(1,10),
                         }


In [222]:
objective_metric_name = 'val:loss'
objective_type = 'Minimize'
metric_definitions = [
        {'Name':'train:loss', 'Regex':'train_loss=(.*?),'},
        {'Name':'train:accuracy', 'Regex':'train_acc=(.*?),'},
        {'Name':'val:loss', 'Regex':'val_loss=(.*?),'},
        {'Name':'val:f1', 'Regex':'val_f1=(.*?),'},
        {'Name':'val:accuracy', 'Regex':'val_acc=(.*?)$'}
    ]


In [223]:
job_name = train_dict['model-name']+"hyperparam-training-job-"+"-{}".format(int(time.time()))

tuner = HyperparameterTuner(estimator,
                            objective_metric_name = objective_metric_name,
                            hyperparameter_ranges  = hyperparameter_ranges,
                            metric_definitions  = metric_definitions,
                            max_jobs=10,
                            max_parallel_jobs=10,
                            objective_type=objective_type,
                           base_tuning_job_name = job_name,
                           early_stopping_type = 'Off')

hyperparameter_ranges

video_folder = 's3://XXXX/datasets/IEMOCAP/TRAINING_VIDEOS_2'
audio_folder = 's3://XXXX/datasets/IEMOCAP/LOGMEL_DELTAS'
audio_pretrained_folder = 's3://XXXX/datasets/PRETRAINED_MODELS/IEMOCAP/Audio/',
video_pretrained_folder = 's3://XXXX/datasets/PRETRAINED_MODELS/IEMOCAP/Video/',

In [224]:
tuner.fit( 
    inputs = {
        'audio_folder' : audio_folder,
        'video_folder':video_folder,
        'audio_pretrained_folder': audio_pretrained_folder,
        'video_pretrained_folder': video_pretrained_folder,
    },

    experiment_config={
        "TrialName": exp_trial.trial_name,
        "TrialComponentDisplayName": "Training",
    })

INFO:sagemaker.image_uris:Defaulting to the only supported framework/algorithm version: latest.
INFO:sagemaker.image_uris:Ignoring unnecessary instance type: None.
INFO:sagemaker:Creating hyperparameter tuning job with name: MultimodalMatchmaphy-210711-2035


........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................