#### Ideas
- Train a very large CNN-dense network on big computer:
    - Use window step size of 1
    - Problem: sequences are so long that the model is more likely to overfit than to learn useful things
    - Solution: do random masking on data as sort of regularization
    - 1D CNNs with smaller stride, followed by just dense should be a decent architecture
    - If its not too difficult, do CNN for dim-reduction followed by transformer block
- Split a sequence into chunks and do manual feature engineering:
    - Pro: Solves the overfitting problem with long sequences
    - Pro: trains faster
    - Con: Removes one of the main benefits of NNs (automatic feature engineering)
    - Con: requires clever and careful feature engineering
    - Con: might be more computationally heavy if feat eng is done on the fly
- Try transformer/self-attention architecture
- Try the feature engineering approach for validation
    - Maybe try continuous prediction? 

### Imports

In [14]:
import sys
if not '../' in sys.path:
    sys.path.append('../')

import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader

import importlib

In [None]:
second_earthquake = 50085877

### Read in and preprocess data

In [15]:
data_dir = '../data/'
preprocessed_dir = data_dir + 'preprocessed/'

In [16]:
train_data = pd.read_csv(data_dir + 'train.csv',  dtype={'acoustic_data': np.float32, 'time_to_failure': np.float32}).values
test_dir = data_dir + 'test/'

In [17]:
## Drop some of the training data for memory efficiency
data_frac = 0.5
train_data = train_data[:int(data_frac * len(train_data))].copy()

In [18]:
train_desc = pd.Series.from_csv(preprocessed_dir + 'training_data_description.csv')

  infer_datetime_format=infer_datetime_format)


In [114]:
def scale(acoustic_data, standard=True):
    if not standard:
        ## Puts values in range [-1, 1]
        acoustic_data = 2 * (acoustic_data - train_desc['mean']) / (train_desc['max'] - train_desc['min'])
    else:
        acoustic_data = (acoustic_data - train_desc['mean']) / train_desc['std']
        
    return acoustic_data

In [115]:
train_data[:, 0] = scale(train_data[:, 0])

### Hyperparameters

In [41]:
from utils.dataset import FeatureEngineer as FeatureEngineer

In [122]:
## Model config
config = dict(
    
    data_dir = data_dir,
    use_cuda = torch.cuda.is_available(),
    seq_len = 150000 // FeatureEngineer.chunk_size,
    n_features = FeatureEngineer.n_features,
    
    ## Training parameters
    batch_size = 16,
    lr = 0.001,
    num_epochs = 20,
    clip = 0.1, # Gradient clipping
    eval_step = 0.1, # how often to evaluate, per epoch. E.g., 0.5 -> 2 times per epoch
    patience = 10, # patience (in nr of evals) for early stopping. If None, will not use early stopping 
    revert_after_training = True, # If true, reverts model parameters after training to best found during early stopping
    
    ## Model hyperparameters
    model_choice = 1,
    optim_choice = 0,
    hidden_size = 512,
    dropout = 0.2,
    dense_size = 1000,
    bidirectional = True,
    num_layers = 2,
)

device = torch.device("cuda" if config['use_cuda'] else "cpu")
print("Using {}.".format(device))

Using cuda.


### Create dataset

In [91]:
import utils.dataset
importlib.reload(utils.dataset)
from utils.dataset import *

In [96]:
# valid_frac = 0.2
# n_train_data = int(len(train_data) * (1-valid_frac))

X_train = train_data[second_earthquake + 1:]
X_valid = train_data[:second_earthquake + 1]

train_dataset = EarthquakeDatasetTrain(X_train, window_step=1000, mask_prob=0.1)
valid_dataset = EarthquakeDatasetTrain(X_valid, window_step=150000)

train_loader = DataLoader(train_dataset, 
                          batch_size=config['batch_size'], 
                          shuffle=True, 
                          num_workers=4)

valid_loader = DataLoader(valid_dataset, 
                         batch_size=100, 
                         shuffle=False, 
                         num_workers=4)

print("{:,} train examples.".format(len(train_dataset)))
print("{:,} valid examples.".format(len(valid_dataset)))

264,336 train examples.
332 valid examples.


### Create model

In [126]:
import utils.models
importlib.reload(utils.models)
from utils.models import *

import utils.model_wrapper
importlib.reload(utils.model_wrapper)
from utils.model_wrapper import *
model = ModelWrapper(config=config)

In [127]:
summary, n_params = model.get_summary()
print("{:,} total parameters".format(n_params))
summary

9,484,241 total parameters


Unnamed: 0,name,# params
0,rnn.weight_ih_l0,26624
1,rnn.weight_hh_l0,1048576
2,rnn.bias_ih_l0,2048
3,rnn.bias_hh_l0,2048
4,rnn.weight_ih_l0_reverse,26624
5,rnn.weight_hh_l0_reverse,1048576
6,rnn.bias_ih_l0_reverse,2048
7,rnn.bias_hh_l0_reverse,2048
8,rnn.weight_ih_l1,2097152
9,rnn.weight_hh_l1,1048576


### Train Model

In [128]:
## DEBUG
for batch in train_loader:
    break

output = model.net.forward(batch['features'].to(device))

In [None]:
## Preparatory training with higher learning rate and lower gradient clipping
config_changes = dict(
    num_epochs = 20,
    eval_step = 0.01,
    patience = 40,
    revert_after_training = True,
    clip = 0.1,
    lr = 0.001,
)
model.update_config(config_changes)

_ = model.train(train_loader, valid_loader, verbose=2)
print("Preperatory training finished!")

In [None]:
## Full training
config_changes = dict(
    num_epochs = 100,
    patience = config['patience'],
    revert_after_training = True,
    clip = config['clip'],
    lr = config['lr'],
)
model.update_config(config_changes)

_ = model.train(train_loader, valid_loader)

In [48]:
#model.save_state('../checkpoints/', 'model0')

In [65]:
#model = ModelWrapper(pretrained_path='../checkpoints/model0')

In [100]:
preds = model.predict(valid_loader)

In [101]:
preds.mean()

6.069175

In [102]:
preds.std()

0.033578407

In [117]:
X_train[:, 0].mean()

0.0044764853

In [118]:
X_train[:, 0].std()

0.9482863

### Predict

In [13]:
test_dataset = EarthquakeDatasetTest(test_dir)
test_loader = DataLoader(test_dataset,
                         batch_size=100, 
                         shuffle=False, 
                         num_workers=4)

In [14]:
preds, ids = model.predict(test_loader)

In [15]:
submission = pd.DataFrame({
    'seg_id' : ids,
    'time_to_failure' : preds,
})

In [16]:
submission.to_csv('../submission.csv', index=None)