In [1]:
%load_ext autoreload
import pandas as pd
import boto3
import sagemaker
from tqdm import tqdm
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [2]:
#create sagemaker session, role and bucket
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

bucket = sagemaker_session.default_bucket()

In [3]:
%%time
#upload data to s3
data_dir = 'data'
prefix = 'sagemaker/Arvato'
train_location = sagemaker_session.upload_data(os.path.join(data_dir, 'train.csv'), key_prefix=prefix)
val_location = sagemaker_session.upload_data(os.path.join(data_dir, 'val.csv'), key_prefix=prefix)
test_location = sagemaker_session.upload_data(os.path.join(data_dir, 'test.csv'), key_prefix=prefix)

CPU times: user 6.14 s, sys: 1.85 s, total: 8 s
Wall time: 17.1 s


In [4]:
train_location

's3://sagemaker-us-east-1-579131320339/sagemaker/Arvato/train.csv'

In [6]:
input_size = pd.read_csv("data/train.csv", nrows=20, header = None, index_col = None).shape[1] - 1

In [7]:
#testing code
from src import Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model.ArvatoClassifier(input_size, (5,5), 1).to(device)

In [8]:
optimizer = optim.Adam(model.parameters())
loss_fn = torch.nn.BCELoss()

In [9]:
train_loader = Model._get_data_loader(512, 'data', 'train.csv')
val_loader = Model._get_data_loader(512, 'data', 'val.csv')


Get train data loader from data.
Get val data loader from data.


In [10]:
#testing that train method works 
Model.train(model, train_loader, val_loader, 2, optimizer, loss_fn, device, 'artifacts', 
            (input_size, (100,100,100), 1, 0.3))


Epoch: 1, train loss: 0.6687925321933551, val loss: 0.5240947431412296, training done in 0m 4s
Epoch: 2, train loss: 0.5667037811034765, val loss: 0.508777620135874, training done in 0m 3s
best_val_loss: 0.508777620135874


In [19]:
#create estimator
from sagemaker.pytorch import PyTorch

estimator = PyTorch(entry_point="Model.py",
                    source_dir="src",
                    role=role,
                    framework_version='0.4.0',
                    train_instance_count=1,
#                     train_instance_type='ml.p2.xlarge',
                    train_instance_type = 'ml.m4.xlarge',
                    hyperparameters={
                        'epochs': 100,
                        'input_size': input_size,
                        'output_size': 1,
                        'hidden_dim1': 100,
                        'hidden_dim2': 100,
                        'hidden_dim3': 100,
                        'lr' : 2e-3,
                        'batch-size' : 128,
                    })


In [22]:
from sagemaker.tuner import IntegerParameter, ContinuousParameter, HyperparameterTuner
import time 
#create hyperparameter tuning job
objective_metric_name = 'val-loss'
metric_definitions = [{'Name': objective_metric_name,
                       'Regex': 'best_val_loss: (.*)'}]
hyperparameter_tuner = HyperparameterTuner(estimator = estimator, 
                                               objective_metric_name = objective_metric_name,
                                               metric_definitions=metric_definitions,
                                               objective_type = 'Minimize',
                                               max_jobs = 50,
                                               max_parallel_jobs = 5,
                                               hyperparameter_ranges = {
                                                    'hidden_dim1': IntegerParameter(50, 150),
                                                    'hidden_dim2': IntegerParameter(50, 150),
                                                    'hidden_dim3': IntegerParameter(50, 150),
                                                    'lr'      : ContinuousParameter(8e-4, 3e-3),
                                               })
hyperparameter_tuner.fit({'train': train_location, 'val': val_location})

In [23]:
%time
hyperparameter_tuner.wait()

CPU times: user 3 µs, sys: 1e+03 ns, total: 4 µs
Wall time: 7.39 µs
....................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [29]:
%%time
#get best result
best_estimator = PyTorch.attach(hyperparameter_tuner.best_training_job())

from sagemaker.pytorch import PyTorchModel

model = PyTorchModel(model_data=best_estimator.model_data,
                     role = role,
                     framework_version='1.1',
                     entry_point='Model.py',
                     source_dir='src')




2020-01-18 11:06:17 Starting - Preparing the instances for training
2020-01-18 11:06:17 Downloading - Downloading input data
2020-01-18 11:06:17 Training - Training image download completed. Training in progress.
2020-01-18 11:06:17 Uploading - Uploading generated training model
2020-01-18 11:06:17 Completed - Training job completed[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2020-01-18 10:59:12,010 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2020-01-18 10:59:12,011 sagemaker-containers INFO     Failed to parse hyperparameter _tuning_objective_metric value val-loss to Json.[0m
[34mReturning the value itself[0m
[34m2020-01-18 10:59:12,012 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-01-18 10:59:12,026 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m20

In [30]:
%time
#deploy
predictor = model.deploy(initial_instance_count = 1,
                        instance_type = 'ml.t2.xlarge')

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 8.82 µs
--------------------------------------------------------------------------------------------------------------------------!

In [31]:
test_data = pd.read_csv('data/test.csv', header = None, index_col = None).values

In [33]:
import numpy as np
blks = np.array_split(test_data, int(test_data.shape[0] / 128) + 1)
res = []
for test_x in tqdm(blks):
    test_x = test_x.astype('float32')
    y = predictor.predict(test_x)
    res.append(y)
res = np.concatenate(res, axis = 0)    

100%|██████████| 335/335 [00:06<00:00, 52.03it/s]


In [34]:
res = pd.DataFrame(res)
res.columns = ['RESPONSE']
res.head()

Unnamed: 0,RESPONSE
0,0.905665
1,0.842811
2,0.808107
3,0.473558
4,0.784613


In [35]:
index = pd.read_csv('../data/Udacity_MAILOUT_052018_TEST.csv', sep = ';', low_memory = False)['LNR']

In [36]:
pred = pd.concat((index, res), axis = 1)
pred.head()

Unnamed: 0,LNR,RESPONSE
0,1754,0.905665
1,1770,0.842811
2,1465,0.808107
3,1470,0.473558
4,1478,0.784613


In [37]:
pred.to_csv('data/nn_out.csv', index = False)

In [38]:
!cat data/nn_out.csv

LNR,RESPONSE
1754,0.9056646
1770,0.8428113
1465,0.80810666
1470,0.4735575
1478,0.7846129
1782,0.5269857
1485,0.31652123
1519,0.8230543
1835,0.8537613
1522,0.7565267
1539,0.8277796
1853,0.7411966
1856,0.692463
2502,0.36510876
2182,0.69408196
2191,0.8737646
2522,0.56207275
2530,0.5933678
2215,0.3471988
2550,0.7807484
2570,0.35144645
2574,0.6665743
2579,0.6201445
2247,0.85145694
2674,0.69904506
11434,0.47901174
11122,0.86523515
11127,0.66242266
11443,0.829992
11170,0.8329269
11191,0.64928627
11198,0.8295289
10936,0.8328609
11244,0.78276473
11247,0.84392667
11252,0.7912605
11259,0.8328609
10945,0.7810639
10951,0.46519497
11271,0.8769264
10963,0.75864106
11282,0.32140806
11291,0.39221838
10980,0.30484924
10994,0.6830948
10998,0.8329269
11303,0.74400854
11630,0.7653499
11636,0.21364942
11949,0.24510884
11950,0.883593
11956,0.90695703
11648,0.8329269
11653,0.6658357
11967,0.24010819
11972,0.580589
11975,0.61724323
12110,0.71044046
121

7303,0.37293598
7639,0.6497852
7336,0.41537476
7339,0.85815287
7340,0.7864381
7341,0.7750875
7349,0.38970897
7361,0.2262111
7364,0.5217253
7690,0.55858314
7694,0.856319
7094,0.3658445
7402,0.8049207
7408,0.8328609
7115,0.17160767
7116,0.3283635
7426,0.2108978
7430,0.8327949
7439,0.8329269
7440,0.85255295
7122,0.3286402
7123,0.8329269
7447,0.8672674
7449,0.2038354
7147,0.3653899
7469,0.8055822
7480,0.6337409
7496,0.8395722
7498,0.7198239
7500,0.3910895
7188,0.7915602
7520,0.25817814
7537,0.8329269
8163,0.42114216
8176,0.3256858
16737,0.7719625
17145,0.6754724
17153,0.66893965
4319,0.34195942
12872,0.7667855
13839,0.2760833
13843,0.8560825
13845,0.78351253
13520,0.5038079
13522,0.837218
13532,0.5973586
13872,0.4669225
13873,0.40332514
13549,0.8327949
13883,0.8760324
13895,0.7883772
13560,0.25308773
13573,0.68513805
13591,0.8028212
13604,0.89892507
13939,0.3640607
13639,0.7210295
13643,0.8226371
13651,0.80136293
13689,0.48367235


38392,0.7125391
39061,0.8329269
39072,0.53078085
38744,0.78459096
38770,0.61016196
38776,0.8337831
39113,0.8478802
39117,0.12673445
38786,0.8328609
38791,0.6827982
39216,0.9085432
38816,0.6050588
39126,0.17048636
39134,0.576641
38824,0.85909647
39158,0.4106815
39165,0.8176116
39169,0.8328609
39175,0.34817025
48646,0.8047906
48973,0.30988073
49316,0.83136857
49623,0.57165974
49947,0.7486996
50278,0.6186037
50601,0.20238173
45733,0.8328609
45737,0.8328609
45741,0.3789377
46056,0.8329269
46057,0.65558034
46381,0.7544084
46712,0.21651246
46722,0.7495106
47041,0.50996816
47044,0.57267594
47375,0.6605484
47380,0.5813498
47381,0.8327949
47700,0.4959363
47702,0.31612653
47705,0.4856918
48026,0.35622075
48676,0.67358977
48679,0.7998357
48684,0.5444185
48993,0.13742337
49005,0.55690485
49006,0.19281676
49317,0.7888723
80681,0.82339144
80695,0.79635096
81337,0.38747293
76546,0.8660296
76865,0.21625412
84565,0.84224534
84567,0.85434806
85214

In [None]:
#score of 0.70511 on kaggle

In [40]:
#clean
predictor.delete_endpoint()
bucket_to_delete = boto3.resource('s3').Bucket(bucket)
bucket_to_delete.objects.all().delete()

[{'ResponseMetadata': {'RequestId': '8D128B9F36BC6848',
   'HostId': 'uS9KdmOFCMeLeBQiavA8L64GcbhkZT20kOBH9eLnzpBCa8u8ZqnpMqCsE6Elz9A/oRwuIZd7dws=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'uS9KdmOFCMeLeBQiavA8L64GcbhkZT20kOBH9eLnzpBCa8u8ZqnpMqCsE6Elz9A/oRwuIZd7dws=',
    'x-amz-request-id': '8D128B9F36BC6848',
    'date': 'Sat, 18 Jan 2020 13:02:05 GMT',
    'connection': 'close',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'sagemaker-pytorch-2020-01-17-15-31-01-866/debug-output/training_job_end.ts'},
   {'Key': 'sagemaker/trans/sagemaker-pytorch-2020-01-10-22-37-21-005/debug-output/training_job_end.ts'},
   {'Key': 'sagemaker-pytorch-2020-01-16-14-24-40-722/source/sourcedir.tar.gz'},
   {'Key': 'sagemaker-pytorch-200118-0929-019-3300bfc0/output/model.tar.gz'},
   {'Key': 'sagemaker/Arvato/test.csv'},
   {'Key': 'sagemaker-pytorch-200118-0929-032-e5ba08e6/output/m