In [14]:
import copy
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR

import numpy as np

import time
import utils
import models
import argparse
import data_loader
import pandas as pd
import ujson as json

from sklearn import metrics

from ipdb import set_trace

In [9]:
#parser = argparse.ArgumentParser()
#parser.add_argument('--epochs', type=int, default=1000)
#parser.add_argument('--batch_size', type=int, default=32)
#arser.add_argument('--model', type=str)
#parser.add_argument('--hid_size', type=int)
#parser.add_argument('--impute_weight', type=float)
#parser.add_argument('--label_weight', type=float)
#args = parser.parse_args()

from collections import namedtuple
args = namedtuple("MyStruct", "model epochs batch_size hid_size impute_weight label_weight")

args.model="rits_i"
args.epochs=1000
args.batch_size=32
args.hid_size=108
args.impute_weight=0.3
args.label_weight=1.0

In [10]:
def train(model):
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    print('\r Loading data...')
    data_iter = data_loader.get_loader(batch_size=args.batch_size)

    for epoch in range(args.epochs):
        model.train()

        run_loss = 0.0

        for idx, data in enumerate(data_iter):
            data = utils.to_var(data)
            
            ret = model.run_on_batch(data, optimizer, epoch)

            run_loss += ret['loss'].item()

            print('\r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)))

        evaluate(model, data_iter)

In [11]:
def evaluate(model, val_iter):
    model.eval()

    labels = []
    preds = []

    evals = []
    imputations = []

    save_impute = []
    save_label = []

    for idx, data in enumerate(val_iter):
        data = utils.to_var(data)
        ret = model.run_on_batch(data, None)

        # save the imputation results which is used to test the improvement of traditional methods with imputed values
        save_impute.append(ret['imputations'].data.cpu().numpy())
        save_label.append(ret['labels'].data.cpu().numpy())

        pred = ret['predictions'].data.cpu().numpy()
        label = ret['labels'].data.cpu().numpy()
        is_train = ret['is_train'].data.cpu().numpy()

        eval_masks = ret['eval_masks'].data.cpu().numpy()
        eval_ = ret['evals'].data.cpu().numpy()
        imputation = ret['imputations'].data.cpu().numpy()

        evals += eval_[np.where(eval_masks == 1)].tolist()
        imputations += imputation[np.where(eval_masks == 1)].tolist()

        # collect test label & prediction
        pred = pred[np.where(is_train == 0)]
        label = label[np.where(is_train == 0)]

        labels += label.tolist()
        preds += pred.tolist()

    labels = np.asarray(labels).astype('int32')
    preds = np.asarray(preds)

    print('AUC {}'.format(metrics.roc_auc_score(labels, preds)))

    evals = np.asarray(evals)
    imputations = np.asarray(imputations)

    print('MAE', np.abs(evals - imputations).mean())

    print('MRE', np.abs(evals - imputations).sum() / np.abs(evals).sum())

    save_impute = np.concatenate(save_impute, axis=0)
    save_label = np.concatenate(save_label, axis=0)

    np.save('./result/{}_data'.format(args.model), save_impute)
    np.save('./result/{}_label'.format(args.model), save_label)

In [12]:
def run():
    model = getattr(models, args.model).Model(args.hid_size, args.impute_weight, args.label_weight)
    total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('Total params is {}'.format(total_params))

    if torch.cuda.is_available():
        model = model.cuda()

    train(model)

In [None]:
if __name__ == '__main__':
    run()

Total params is 85572
 Loading data...
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47




 Progress epoch 0, 0.80%, average loss 11.05997371673584
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
 Progress epoch 0, 1.60%, average loss 10.902449131011963
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
 Progress epoch 0, 2.40%, average loss 10.885335604349772
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
 Progress epoch 0, 3.20%, average loss 10.815112590789795
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
 Progress epoch 0, 4.00%, average loss 10.915625
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
 Progress epoch 0, 4.80%, average loss 10.88871018

In [None]:
model = getattr(models, args.model).Model(args.hid_size, args.impute_weight, args.label_weight)
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-3)

print('\r Loading data...')
data_iter = data_loader.get_loader(batch_size=args.batch_size)

In [None]:
rec=data_iter.dataset.__getitem__(1)
recs=data_iter.dataset
forward = map(lambda x: x['forward'], recs)

In [None]:


masks=torch.FloatTensor(list(map(lambda r: r['masks'], forward)))
masks

In [None]:
mask = torch.IntTensor(list(map(lambda r: r['masks'], forward)))

mask

In [None]:
for idx, data in enumerate(data_iter):
    data2 = utils.to_var(data)
    print(idx)
    m=data2['forward']['masks']
    m2=torch.IntTensor(list(map(lambda r: r['masks'], data)))
    m[:,0,:]


In [None]:
list(map(lambda r: r['masks'], forward))

In [None]:
z=data_iter._get_iterator()
z2=z.__next__()


In [None]:
z2.values()

In [None]:
values=data1['forward']['masks']
values


