In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

# import jtplot submodule from jupyterthemes
from jupyterthemes import jtplot
# currently installed theme will be used to
# set plot style if no arguments provided
jtplot.style()

from preprocessing import *
from mymodels import *
from databunch import *

data_path = Path('tbmData/data')
fn_txt = sorted(data_path.glob('*.txt'))
print(fn_txt[:3])
fn_cycles = Path('tbmData/cycles1')

# debug = True
debug = False

mulr = 3 if debug else 7
n_cont = 5 if debug else 192
num_cycles = 10 if debug else 3481

valid_ratio = 0.2
train_ratio = 1 - valid_ratio
train_idx = np.arange(int(num_cycles * valid_ratio), num_cycles)
valid_idx = np.arange(int(num_cycles * valid_ratio))
train_idx_tile = (train_idx[:, None] + np.arange(mulr) * num_cycles).flatten()
valid_idx_tile = (valid_idx[:, None] + np.arange(mulr) * num_cycles
                  ).flatten()  # take from all tiles

bs = 2 if debug else int(num_cycles * train_ratio)
sl = 30
gpu_start = 2
torch.cuda.set_device(gpu_start)
# device_ids = range(gpu_start, gpu_start + num_gpus)

# cont_names = ['推进速度', '主驱动1#电机扭矩', '刀盘扭矩', '刀盘转速','主液压油箱温度', '前点偏差X', '主液压油箱温度']
is_problem1 = True
dep_var = ['推进速度电位器设定值', '刀盘转速电位器设定值'] if is_problem1 else ['总推进力', '刀盘扭矩']

In [None]:
fns_feathers = sorted(fn_cycles.glob('cycle*'))[:num_cycles]
fmtr = DataFormatter(cycle_feathers=fns_feathers)
cycles = fmtr.cycles
idx = [beginning_index(o.iloc[:500]) for o in tqdm_notebook(cycles)]

In [None]:
cont_names = [o for o in cycles[0].columns[2:2+n_cont] if o not in dep_var]
df_conts = tile_with_noise(cycles, idx, mulr, cont_names)

In [None]:
# cyc = cycles[0].iloc[:500]
# axis = plots(get_interesting_columns(cyc), title=str(idx));
# for ax in axis.flatten():
#     ax.axvline(idx)

In [None]:
deps = fmtr.get_y(dep_var)
deps = pd.concat([deps]*mulr).reset_index(drop=True)
cyc_cont = flatten_and_cat(df_conts, deps)

In [None]:
cyc_cont.to_feather('tmp/cyc_cont_all_allc')

In [None]:
#init
cyc_cont = feather.read_dataframe('tmp/cyc_cont_all_allc')

In [None]:
valid_idx_tile = (valid_idx[:,None] + np.arange(mulr) * num_cycles).flatten() # take from all tiles
procs = Normalize
databunch = MultiDeptTabularDataBunch.from_df('tmp', cyc_cont, dep_var, valid_idx=valid_idx, bs=bs, procs=procs)

rnndb = RNNDataBunch.create(databunch.train_ds, databunch.valid_ds, bs=bs)

In [None]:
n_hidden, n_layers = 800, 4
# n_hidden, n_layers = 10, 2
rnn_enc = ContModel1(1, n_cont, n_hidden, n_layers)

layers, drops = [3*n_cont, 3*n_cont, 2], [0, 0]
model = MultiInputSequentialRNN(rnn_enc, PoolingLinearClassifier(layers, drops)).cuda()
model.reset()

weight = cyc_cont[dep_var].max().values
weight = torch.tensor(weight[::-1] / weight.max(), dtype=torch.float32).cuda()

learner = Learner(rnndb, model, loss_func=weighted_rnn_mse(weight), metrics=rnn_metrics)


from fastai.callbacks import *
learner.callback_fns += [ShowGraph, partial(SaveModelCallback, name='rnn1')]
alpha, beta = 2., 1.
learner.callbacks.append(RNNTrainer(learner, sl, alpha=alpha, beta=beta))

In [None]:
learner.lr_find(num_it=50)
learner.recorder.plot()

In [None]:
learner.fit_one_cycle(7, 0.5)

In [None]:
learner.fit_one_cycle(5, 1e-2)

In [None]:
learner.lr_find()
learner.recorder.plot()

In [None]:
learner.fit_one_cycle(10, 1e-1/2)

In [None]:
learner.lr_find()
learner.recorder.plot()

In [None]:
learner.fit_one_cycle(3, 1e-2/2)
learner.recorder.plot_lr()

In [None]:
learner.fit_one_cycle(10, 1e-3/2)
learner.recorder.plot_lr()

In [None]:
x, y = next(iter(databunch.valid_dl))
y = to_np(y)
pred = to_np(learner.pred_batch(x)).squeeze()

In [None]:
r = y.max() - y.min()
list(zip(pred, y, [(a-b)/(r)*100 for a,b in zip(pred, y)]))

In [None]:
learner.fit_one_cycle(50, 1e-3)

In [None]:
learner.fit_one_cycle(100, 2*1e-3)