In [None]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

# import jtplot submodule from jupyterthemes
from jupyterthemes import jtplot
# currently installed theme will be used to
# set plot style if no arguments provided
jtplot.style()

from preprocessing import *
from mymodels import *
from databunch import *

data_path = Path('tbmData/data')
fn_txt = sorted(data_path.glob('*.txt'))
print(fn_txt[:3])
fn_cycles = Path('tbmData/cycles1')

debug = False
# debug = True

mulr = 3 if debug else 7

selected_columns = False
# selected_columns = True
if selected_columns == True:
    cont_names = ['推进速度', '主驱动1#电机扭矩', '刀盘扭矩', '刀盘转速','主液压油箱温度', '前点偏差X']
    n_cont = len(cont_names)
else:
    n_cont = 5 if debug else 192
    
num_cycles = 10 if debug else 3481

valid_ratio = 0.2
train_ratio = 1 - valid_ratio
train_idx = np.arange(int(num_cycles * valid_ratio), num_cycles)
valid_idx = np.arange(int(num_cycles * valid_ratio))
train_idx_tile = (train_idx[:, None] + np.arange(mulr) * num_cycles).flatten()
valid_idx_tile = (valid_idx[:, None] + np.arange(mulr) * num_cycles
                  ).flatten()  # take from all tiles

bs = int(num_cycles * train_ratio)
sl = 30
gpu_start = 1
torch.cuda.set_device(gpu_start)
# device_ids = range(gpu_start, gpu_start + num_gpus)

is_problem1 = True
dep_var = ['推进速度电位器设定值', '刀盘转速电位器设定值'] if is_problem1 else ['总推进力', '刀盘扭矩']

In [None]:
fns_feathers = sorted(fn_cycles.glob('cycle*'))[:num_cycles]
fmtr = DataFormatter(cycle_feathers=fns_feathers)
cycles = fmtr.cycles
idx = [beginning_index(o.iloc[:500]) for o in tqdm_notebook(cycles)]

In [None]:
# cont_names = [o for o in cycles[0].columns[2:2+n_cont] if o not in dep_var]
# cont_names = [o for o in cycles[0].columns[cont_names] if o not in dep_var]
df_conts = tile_with_noise(cycles, idx, mulr, cont_names)

In [None]:
# i = 0
# cyc = cycles[i].iloc[:500]
# axis = plots(get_interesting_columns(cyc), title=str(idx[i]));
# for ax in axis.flatten():
#     ax.axvline(idx[i])

In [None]:
deps = fmtr.get_y(dep_var)
deps = pd.concat([deps]*mulr).reset_index(drop=True)
cyc_cont = flatten_and_cat(df_conts, deps)

In [None]:
cyc_cont.to_feather('tmp/cyc_cont_all_6_debug')

In [None]:
#init
cyc_cont = feather.read_dataframe('tmp/cyc_cont_all_6_debug')

In [None]:
#init
cyc_cont = feather.read_dataframe('tmp/cyc_cont_all_allc')

In [None]:
valid_idx_tile = (valid_idx[:,None] + np.arange(mulr) * num_cycles).flatten() # take from all tiles
procs = Normalize
databunch = MultiDeptTabularDataBunch.from_df('tmp', cyc_cont, dep_var, valid_idx=valid_idx, bs=bs, procs=procs)

rnndb = RNNDataBunch.create(databunch.train_ds, databunch.valid_ds, bs=bs)

In [None]:
class DummyContModel1(RNNCore):
    def __init__(self, n_cat:int, n_cont:int, n_hid:int, n_layers:int, sl=30,
                 bidir:bool=False, hidden_p:float=0.2, input_p:float=0.6,
                 embed_p:float=0.1, weight_p:float=0.5, qrnn:bool=False):
        vocab_sz,pad_token=1,0 # continuous variables only for this model
        self.sl, self.n_cat, self.n_cont = sl, n_cat, n_cont
        self.final = bn_drop_lin(n_cont, 2, actn=None)

        super().__init__(vocab_sz=vocab_sz, emb_sz=n_cont, n_hid=n_hid, n_layers=n_layers, pad_token=pad_token, bidir=bidir,
                 hidden_p=hidden_p, input_p=input_p, embed_p=embed_p, weight_p=weight_p, qrnn=qrnn)

    def forward(self, x_cat, x_cont)->Tuple[Tensor,Tensor]:
#         x_cat, x_cont = x[:,self.n_cat], x[:,self.n_cat:]
#         print(x_cont.shape)
        bs,_ = x_cont.size()
        input = x_cont.view(bs, self.sl, self.n_cont)
        self.reset()

        if bs!=self.bs:
            self.bs=bs
            self.reset()
        raw_output = self.input_dp(input)
        new_hidden,raw_outputs,outputs = [],[],[]
        for l, (rnn,hid_dp) in enumerate(zip(self.rnns, self.hidden_dps)):
            raw_output, new_h = rnn(raw_output, self.hidden[l])
            new_hidden.append(new_h)
            raw_outputs.append(raw_output)
            if l != self.n_layers - 1: raw_output = hid_dp(raw_output)
            outputs.append(raw_output)
        self.hidden = to_detach(new_hidden, cpu=False)

        x = raw_output[:,-1]
        mid = x.shape[1]
        x = torch.cat([x[:,:mid//2].sum(1)[:,None], x[:,mid//2:].sum(1)[:,None]], 1)
        
        return x, raw_outputs, outputs

In [None]:
n_hidden, n_layers = n_cont*3, 3
# n_hidden, n_layers = 10, 2
# , input_p=0.6, weight_p=0.2
rnn_enc = ContModel1(1, n_cont, n_hidden, n_layers)

layers, drops = [3*n_cont, n_cont, 2], [0.1, 0.1]
# model = MultiInputSequentialRNN(rnn_enc, DummyModel(layers, drops)).cuda()
# model = DummyContModel1(1, n_cont, n_hidden, n_layers)
model = MultiInputSequentialRNN(rnn_enc, PoolingLinearClassifier(layers, drops)).cuda()
model.reset()

weight = cyc_cont[dep_var].max().values
weight = torch.tensor(weight[::-1] / weight.max(), dtype=torch.float32).cuda()

learner = Learner(rnndb, model, loss_func=weighted_rnn_mse(weight), metrics=rnn_metrics, opt_func=optim.SGD)

from fastai.callbacks import *
# learner.callback_fns += [ShowGraph, partial(SaveModelCallback, name='rnn0')]
learner.callback_fns += [ShowGraph,]
learner.callbacks += [TerminateOnNaNCallback()]
alpha, beta = 2., 1.
learner.callbacks.append(RNNTrainer(learner, sl, alpha=alpha, beta=beta))

In [None]:
learner.lr_find(num_it=50)
learner.recorder.plot(1,1)

In [None]:
learner.fit_one_cycle(5, 1e-4*5)

In [None]:
learner.lr_find(num_it=50)
learner.recorder.plot()

In [None]:
learner.fit_one_cycle(10, 1e-5)

In [None]:
learner.fit_one_cycle(20, 1e-5)

In [None]:
learner.fit_one_cycle(50, 1e-5 / 3)

In [None]:
learner.recorder.plot_metrics()

In [None]:
x, y = ni(learner.data.train_dl)
y, p = y.cpu(), learner.model(*x)[0].cpu()
y_np, p_np = to_np(y), to_np(p)
# our_metrics(y, p), our_metrics_np(y_np, p_np)
lf = weighted_rnn_mse(weight.cpu())
our_metrics_np(p_np, y_np)

In [None]:
p

In [None]:
p = learner.model(*x)[1]

In [None]:
p[-2][:,-1]

In [None]:
# l x b x t x h
l, b = 0, 0
[(float(p[l][b][t].mean()), float(p[l][b][t].std())) for t in range(30)]

In [None]:
# l x b x t x h
l, b, t = 0, 0, 1
[(float(p[l][b][t].mean()), float(p[l][b][t].std())) for l in range(n_layers)]

In [None]:
scatter = 

In [None]:
list(zip(y_np.tolist(), p_np.tolist()))