# PyTorchでLSTMモデル運用

In [1]:
%%time
import os
import torch
import numpy as np
import sagemaker
from sklearn.externals import joblib
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math

CPU times: user 1.28 s, sys: 96.1 ms, total: 1.38 s
Wall time: 1.38 s


## PyTorchでのLSTMモデルサンプル

In [57]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam


class Predictor(nn.Module):
    def __init__(self, inputDim, hiddenDim, outputDim):
        super(Predictor, self).__init__()

        self.rnn = nn.LSTM(input_size = inputDim,
                            hidden_size = hiddenDim,
                            batch_first = True)
        self.output_layer = nn.Linear(hiddenDim, outputDim)
    
    def forward(self, inputs, hidden0=None):
        output, (hidden, cell) = self.rnn(inputs, hidden0) #LSTM層
        output = self.output_layer(output[:, -1, :]) #全結合層

        return output

In [58]:
def mkRandomBatch(train_x, train_t, batch_size=10):
    """
    train_x, train_tを受け取ってbatch_x, batch_tを返す。
    """
    batch_x = []
    batch_t = []

    for _ in range(batch_size):
        idx = np.random.randint(0, len(train_x) - 1)
        batch_x.append(train_x[idx])
        batch_t.append(train_t[idx])
    
    return torch.tensor(batch_x), torch.tensor(batch_t)

In [59]:
epochs_num = 10
hidden_size = 100
batch_size = 256



training_size = X_train.shape[0]
test_size = X_test.shape[0]

train_x = X_train.tolist()
train_t = y_train.tolist()
test_x = X_test.tolist()
test_t = y_test.tolist()

model = Predictor(1, hidden_size, 1)
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=0.01)

for epoch in range(epochs_num):
    # training
    running_loss = 0.0
    training_accuracy = 0.0
    for i in range(int(training_size / batch_size)):
        optimizer.zero_grad()

        data, label = mkRandomBatch(train_x, train_t, batch_size)

        output = model(data)

        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

        running_loss += loss.data
        training_accuracy += np.sum(np.abs((output.data - label.data).numpy()) < 0.1)

    #test
    test_accuracy = 0.0
    for i in range(int(test_size / batch_size)):
        offset = i * batch_size
        data, label = torch.tensor(test_x[offset:offset+batch_size]), torch.tensor(test_t[offset:offset+batch_size])
        output = model(data, None)

        test_accuracy += np.sum(np.abs((output.data - label.data).numpy()) < 0.1)

    training_accuracy /= training_size
    test_accuracy /= test_size

    print('%d loss: %.3f, training_accuracy: %.5f, test_accuracy: %.5f' % (
        epoch + 1, running_loss, training_accuracy, test_accuracy))

1 loss: 3.777, training_accuracy: 0.32670, test_accuracy: 0.49015
2 loss: 0.828, training_accuracy: 0.52902, test_accuracy: 0.51157
3 loss: 0.756, training_accuracy: 0.57182, test_accuracy: 0.55013
4 loss: 0.695, training_accuracy: 0.60602, test_accuracy: 0.55698
5 loss: 0.591, training_accuracy: 0.65835, test_accuracy: 0.59640
6 loss: 0.446, training_accuracy: 0.72470, test_accuracy: 0.61611
7 loss: 0.290, training_accuracy: 0.80992, test_accuracy: 0.67266
8 loss: 0.291, training_accuracy: 0.81226, test_accuracy: 0.68723
9 loss: 0.262, training_accuracy: 0.83086, test_accuracy: 0.66495
10 loss: 0.269, training_accuracy: 0.82656, test_accuracy: 0.66153


## SageMakerにデプロイ

### データセットの準備

In [28]:
def _load_data(data, n_prev=50):
    docX, docY = [], []
    for i in range(len(data) - n_prev):
        if i == 0:
            continue
        docX.append(data.iloc[i - 1:i + n_prev - 1].values)
        docY.append(data.iloc[i + n_prev].values)
    alsX = np.array(docX)
    alsY = np.array(docY)

    return alsX, alsY


# 学習用とテスト用データを分割、ただし分割する際に_load_data()を適用
def train_test_split(df, test_size=0.1, n_prev=50):
    """
    This just splits data to training and testing parts
    """
    ntrn = round(len(df) * (1 - test_size))
    ntrn = int(ntrn)
    X_train, y_train = _load_data(df.iloc[0:ntrn], n_prev)
    X_test, y_test = _load_data(df.iloc[ntrn:], n_prev)

    return (X_train, y_train), (X_test, y_test)


def inverse_original_scale(x):
    x = scaler.inverse_transform(x.reshape(1, -1))
    x = np.exp(x)
    return x



df_code_date_hourly = pd.read_csv('./code_run_num_hourly.csv')
split_date = lambda x: x.split(" ")[0]
df_code_date_hourly["date"] = list(map(split_date, df_code_date_hourly['timestamp'].values))

# log scaling
df_code_date_hourly['code_num'] = np.log(df_code_date_hourly['demand'])

# MinMaxS scaling
scaler = MinMaxScaler(feature_range=(0, 1))
df_code_date_hourly['code_num'] = scaler.fit_transform(df_code_date_hourly['code_num'].values.reshape(-1, 1))

# 

length_of_sequences = 24
test_size = 0.1

(X_train, y_train), (X_test, y_test) = train_test_split(df_code_date_hourly[['code_num']], test_size=test_size,
                                                        n_prev=length_of_sequences)

# row_data = df_code_date_hourly['demand'].values

### データセットのアップロード

In [54]:
os.makedirs("./pytorch_data", exist_ok = True)

# np.savez('./data/train', image=X_train, label=y_train)
# np.savez('./data/test', image=X_test, label=y_test)
df_code_date_hourly = pd.read_csv('./code_run_num_hourly.csv')
df_code_date_hourly.to_csv('pytorch_data/train_data.csv')
# np.savez('./data/row_data', row_data=row_data)
joblib.dump(scaler, './pytorch_data/scaler.save')

import sagemaker
sagemaker_session = sagemaker.Session()
bucket_name = sagemaker_session.default_bucket()
input_data = sagemaker_session.upload_data(path='./pytorch_data', bucket=bucket_name, key_prefix='dataset/pytorch')
print('Training data is uploaded to: {}'.format(input_data))

Training data is uploaded to: s3://sagemaker-ap-northeast-1-481470706855/dataset/pytorch


### トレーニングジョブの実行

In [77]:
from sagemaker.pytorch import PyTorch
from sagemaker import get_execution_role

hyper_param = {
    'batch-size': 256,
    'epochs': 15,
    'num_gpus': 0
}

role = get_execution_role()
estimator = PyTorch(entry_point='./pytorch_data/training.py',
                    role=role,
                    framework_version='1.2.0',
                    train_instance_count=1,
                    train_instance_type='ml.m4.xlarge',
                    hyperparameters=hyper_param)

estimator.fit({'training': input_data})

2019-11-26 00:53:13 Starting - Starting the training job...
2019-11-26 00:53:14 Starting - Launching requested ML instances......
2019-11-26 00:54:23 Starting - Preparing the instances for training...
2019-11-26 00:55:02 Downloading - Downloading input data...
2019-11-26 00:55:33 Training - Downloading the training image...
2019-11-26 00:56:08 Training - Training image download completed. Training in progress..[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-11-26 00:56:09,863 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-11-26 00:56:09,866 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[31m2019-11-26 00:56:09,879 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[31m2019-11-26 00:56:12,936 sagemaker_pytorch_container.training INFO     Invoking user training script.

[31m3 loss: 0.784, training_accuracy: 0.50226, test_accuracy: 0.59347[0m
[31m4 loss: 0.716, training_accuracy: 0.54517, test_accuracy: 0.58966[0m
[31m5 loss: 0.708, training_accuracy: 0.56000, test_accuracy: 0.61255[0m
[31m6 loss: 0.670, training_accuracy: 0.58513, test_accuracy: 0.64095[0m
[31m7 loss: 0.607, training_accuracy: 0.62751, test_accuracy: 0.67274[0m
[31m8 loss: 0.571, training_accuracy: 0.64928, test_accuracy: 0.69945[0m
[31m9 loss: 0.456, training_accuracy: 0.70733, test_accuracy: 0.69224[0m
[31m10 loss: 0.346, training_accuracy: 0.75823, test_accuracy: 0.74693[0m
[31m11 loss: 0.275, training_accuracy: 0.81407, test_accuracy: 0.77406[0m
[31m12 loss: 0.278, training_accuracy: 0.81544, test_accuracy: 0.77830[0m
[31m13 loss: 0.247, training_accuracy: 0.82890, test_accuracy: 0.77872[0m
[31m14 loss: 0.266, training_accuracy: 0.82659, test_accuracy: 0.78423[0m

2019-11-26 00:57:10 Uploading - Uploading generated training model
2019-11-26 00:57:10 Complet

### モデルのデプロイ

In [78]:
predictor = estimator.deploy(instance_type='ml.m4.xlarge', initial_instance_count=1)

--------------------------------------------------------------------------------------------------------------!

### エンドポイントの呼び出し

In [22]:
(_, _), (exc, _) = train_test_split(df_code_date_hourly[['demand']], test_size=test_size,
                                                        n_prev=length_of_sequences)

In [23]:
dd = exc[0].reshape(1, 24, 1).tolist()

In [79]:
import boto3
import json

client = boto3.client('sagemaker-runtime')

inp = json.dumps(dd)

response = client.invoke_endpoint(
    EndpointName='pytorch-training-2019-11-26-00-53-12-477',
    Body=inp,
    ContentType='application/json',
    Accept='application/json'
)

body = response['Body']
json.load(body)

{'prediction': 181}