# Notebook for training a ConvLSTM Deep Network

###### Model Specifically trains a Convolutional LSTM model
see model_params for reference

In [8]:
# Define IAM role
import boto3
import re
import os
import numpy as np
import pandas as pd
import importlib
from sagemaker import get_execution_role
import sagemaker as sage
from time import gmtime, strftime
from sagemaker.pytorch import PyTorch
import time

role         = get_execution_role()
sess         = sage.Session()
bucket       = 'oosv-multilingual-bucket'
TOTAL_FRAMES = 150


# this is where to find training and testing date and their respective channels
# when the instance launches, it will create a folder 
# /opt/ml/input/data/{channel}/ where all files in the buckets below are will be copied over
# TODO: move debug data to a debug bucket, currently this is downloading all data in folders which is bad
train_args = {
    'training' : f's3://{bucket}/data/train/train_5',
    'validation'  : f's3://{bucket}/data/test/test_5'
}
debug_args = {
    'training' : f's3://{bucket}/data/debug/train/',
    'validation'  : f's3://{bucket}/data/debug/test'
}

In [16]:

best_mixed_model_params={ 
    'n_features'      : 39,
    'n_hidden'        : 1024, 
    'languages'       : 5,
    'frames'          : TOTAL_FRAMES,
    'dropout'         : 0.0,
    'lstm_layers'     : 1, 
    'linear_layers'   : 1,
    'bidirectional'   : False,
    'lr'              : 0.0001,
    'batch-size'      : 100,
    'epoch'           : 25,
    'backend'         : 'gloo',
    'test-batch-size' : 1000,
    'model'           : 'MixedLSTM',
}

# No Pooling 3 Channels
best_conv_model_params={ 
    'n_features'      : 13,
    'n_hidden'        : 1024, 
    'languages'       : 5,
    'frames'          : TOTAL_FRAMES,
    'dropout'         : 0,
    'lstm_layers'     : 1, 
    'linear_layers'   : 1,
    'bidirectional'   : False,
    'lr'              : 0.0001,
    'batch-size'      : 100,
    'epoch'           : 25,
    'backend'         : 'gloo',
    'test-batch-size' : 1000,
    'model'           : 'ConvLSTM',
    'kernel'          : (1,7),
    'output_channels' : 1
}

estimator1 = PyTorch(entry_point='train.py',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.p3.16xlarge',
                    train_volume_size = 500,
                    source_dir='deep_learning',
                    output_path= f's3://{bucket}/MixedLSTM',
                    framework_version=0.4,
                    base_job_name="MixedLSTM",
                    hyperparameters=best_mixed_model_params)

estimator2 = PyTorch(entry_point='train.py',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.p3.16xlarge',
                    train_volume_size = 500,
                    source_dir='deep_learning',
                    output_path= f's3://{bucket}/ConvLSTM',
                    framework_version=0.4,
                    base_job_name="ConvLSTM-Last",
                    hyperparameters=best_conv_model_params)

In [17]:
estimator1.fit(train_args, wait = False)
#estimator2.fit(train_args, wait = False)

INFO:sagemaker:Creating training-job with name: MixedLSTM-2018-12-13-18-47-32-536


In [9]:
# Debugging single instance
model_params_debug={ 
    'n_features'      : 39,
    'n_hidden'        : 512, 
    'languages'       : 3,
    'frames'          : TOTAL_FRAMES,
    'dropout'         : 0,
    'lstm_layers'     : 1, 
    'linear_layers'   : 1,
    'bidirectional'   : False,
    'lr'              : 0.001,
    'batch-size'      : 100,
    'epoch'           : 5,
    'backend'         : 'gloo',
    'test-batch-size' : 1000,
    'model'           : 'ConvLSTM'
}
estimator_debug = PyTorch(entry_point='train.py',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.p3.8xlarge',
                    train_volume_size = 70,
                    source_dir='deep_learning',
                    output_path= f's3://{bucket}/output',
                    framework_version=0.4,
                    base_job_name="sage-maker-debug-convlstm",
                    hyperparameters=model_params_debug)

estimator_debug.fit(debug_args, wait = True)

INFO:sagemaker:Creating training-job with name: sage-maker-debug-convlstm-2018-12-13-00-31-05-889


2018-12-13 00:31:06 Starting - Starting the training job.

KeyboardInterrupt: 

In [10]:
# Debugging single instance
model_params1={ 
    'n_features'      : 39,
    'n_hidden'        : 512, 
    'languages'       : 3,
    'frames'          : TOTAL_FRAMES,
    'dropout'         : 0.0,
    'lstm_layers'     : 1, 
    'linear_layers'   : 1,
    'bidirectional'   : False,
    'lr'              : 0.0001,
    'batch-size'      : 100,
    'epoch'           : 20,
    'backend'         : 'gloo',
    'test-batch-size' : 1000,
    'model'           : 'ConvLSTM',
    'kernel'          : (1,7),
    'output_channels' : 1
}

# Debugging single instance
model_params2={ 
    'n_features'      : 39,
    'n_hidden'        : 512, 
    'languages'       : 3,
    'frames'          : TOTAL_FRAMES,
    'dropout'         : 0,
    'lstm_layers'     : 1, 
    'linear_layers'   : 1,
    'bidirectional'   : False,
    'lr'              : 0.0001,
    'batch-size'      : 100,
    'epoch'           : 20,
    'backend'         : 'gloo',
    'test-batch-size' : 1000,
    'model'           : 'ConvLSTM',
    'kernel'          : (1,7),
    'output_channels' : 2
}

# Debugging single instance
model_params3={ 
    'n_features'      : 39,
    'n_hidden'        : 512, 
    'languages'       : 3,
    'frames'          : TOTAL_FRAMES,
    'dropout'         : 0,
    'lstm_layers'     : 1, 
    'linear_layers'   : 1,
    'bidirectional'   : False,
    'lr'              : 0.0001,
    'batch-size'      : 100,
    'epoch'           : 20,
    'backend'         : 'gloo',
    'test-batch-size' : 1000,
    'model'           : 'ConvLSTM',
    'kernel'          : (1,7), 
    'output_channels' : 5
}

model_params4={ 
    'n_features'      : 39,
    'n_hidden'        : 512, 
    'languages'       : 3,
    'frames'          : TOTAL_FRAMES,
    'dropout'         : 0,
    'lstm_layers'     : 1, 
    'linear_layers'   : 1,
    'bidirectional'   : False,
    'lr'              : 0.0001,
    'batch-size'      : 100,
    'epoch'           : 20,
    'backend'         : 'gloo',
    'test-batch-size' : 1000,
    'model'           : 'ConvLSTM',
    'kernel'          : (3,3)
}

In [11]:
estimator1 = PyTorch(entry_point='train.py',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.p3.16xlarge',
                    train_volume_size = 70,
                    source_dir='deep_learning',
                    output_path= f's3://{bucket}/ConvLSTM',
                    framework_version=0.4,
                    base_job_name="ConvLSTM",
                    hyperparameters=model_params1)

estimator2 = PyTorch(entry_point='train.py',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.p3.16xlarge',
                    train_volume_size = 70,
                    source_dir='deep_learning',
                    output_path= f's3://{bucket}/ConvLSTM',
                    framework_version=0.4,
                    base_job_name="ConvLSTM",
                    hyperparameters=model_params2)

estimator3 = PyTorch(entry_point='train.py',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.p3.16xlarge',
                    train_volume_size = 70,
                    source_dir='deep_learning',
                    output_path= f's3://{bucket}/ConvLSTM',
                    framework_version=0.4,
                    base_job_name="ConvLSTM",
                    hyperparameters=model_params3)

estimator4 = PyTorch(entry_point='train.py',
                    role=role,
                    train_instance_count=1,
                    train_instance_type='ml.p3.16xlarge',
                    train_volume_size = 70,
                    source_dir='deep_learning',
                    output_path= f's3://{bucket}/ConvLSTM',
                    framework_version=0.4,
                    base_job_name="ConvLSTM",
                    hyperparameters=model_params4)

In [98]:
estimator1.fit(train_args, wait = False)
estimator2.fit(train_args, wait = False)
estimator3.fit(train_args, wait = False)
#estimator4.fit(train_args, wait = False)

INFO:sagemaker:Creating training-job with name: ConvLSTM-2018-10-22-23-09-43-774
INFO:sagemaker:Creating training-job with name: ConvLSTM-2018-10-22-23-09-44-786
INFO:sagemaker:Creating training-job with name: ConvLSTM-2018-10-22-23-09-46-103


### This section below if for testing best performance using Mixed LSTM and Conv LSTM Models over 6 Languages