In [1]:
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier

In [2]:
train_data = torch.load('../data/episodes+encoded_state/train_data.pt')
val_data = torch.load('../data/episodes+encoded_state/val_data.pt')
test_data = torch.load('../data/episodes+encoded_state/test_data.pt')

In [3]:
train_data.keys()

dict_keys(['index', 'icustayids', 'lengths', 'times', 'notdones', 'demographics', 'observations', 'acuities', 'rewards', 'actions', 'actionvecs', 'subactions', 'subactionvecs', 'statevecs'])

In [4]:
print(len(train_data['icustayids']), len(val_data['icustayids']), len(test_data['icustayids']))

13498 2895 2894


## Make flattened (S,A) dataset from episodic dataset

In [5]:
train_statevecs, train_actions = [], []
for i in range(len(train_data['icustayids'])):
    lng = train_data['lengths'][i]
    train_statevecs.append(train_data['statevecs'][i][:lng-1].numpy())
    train_actions.append(train_data['actions'][i][1:lng].numpy())

train_statevecs = np.vstack(train_statevecs)
train_actions = np.concatenate(train_actions)
print(train_statevecs.shape, train_actions.shape)

(165713, 64) (165713,)


In [6]:
val_statevecs, val_actions = [], []
for i in range(len(val_data['icustayids'])):
    lng = val_data['lengths'][i]
    val_statevecs.append(val_data['statevecs'][i][:lng-1].numpy())
    val_actions.append(val_data['actions'][i][1:lng].numpy())

val_statevecs = np.vstack(val_statevecs)
val_actions = np.concatenate(val_actions)
print(val_statevecs.shape, val_actions.shape)

(35388, 64) (35388,)


In [7]:
test_statevecs, test_actions = [], []
for i in range(len(test_data['icustayids'])):
    lng = test_data['lengths'][i]
    test_statevecs.append(test_data['statevecs'][i][:lng-1].numpy())
    test_actions.append(test_data['actions'][i][1:lng].numpy())

test_statevecs = np.vstack(test_statevecs)
test_actions = np.concatenate(test_actions)
print(test_statevecs.shape, test_actions.shape)

(35846, 64) (35846,)


## KNN behavior policy

In [8]:
K = 100

In [9]:
clf_train = KNeighborsClassifier(K)
clf_train.fit(train_statevecs, train_actions)
train_pibs = clf_train.predict_proba(train_statevecs)
train_estm_pibs = train_pibs

In [10]:
clf_val = KNeighborsClassifier(K)
clf_val.fit(val_statevecs, val_actions)
val_pibs = clf_val.predict_proba(val_statevecs)
val_estm_pibs = clf_train.predict_proba(val_statevecs)

In [11]:
clf_test = KNeighborsClassifier(K)
clf_test.fit(test_statevecs, test_actions)
test_pibs = clf_test.predict_proba(test_statevecs)
test_estm_pibs = clf_train.predict_proba(test_statevecs)

In [12]:
with open('../data/knn_output.npz', 'wb') as f:
    np.savez(
        f,
        train_pibs=train_pibs,
        train_estm_pibs=train_estm_pibs,
        val_pibs=val_pibs,
        val_estm_pibs=val_estm_pibs,
        test_pibs=test_pibs,
        test_estm_pibs=test_estm_pibs,
    )

## Assign KNN results back to episodes

In [13]:
train_data['pibs'] = torch.zeros_like(train_data['actionvecs'])
train_data['estm_pibs'] = torch.zeros_like(train_data['actionvecs'])
ptr = 0
for i in range(len(train_data['icustayids'])):
    lng = train_data['lengths'][i]
    train_data['pibs'][i, :lng-1, :] = torch.tensor(train_pibs[ptr:ptr+lng-1])
    train_data['estm_pibs'][i, :lng-1, :] = torch.tensor(train_estm_pibs[ptr:ptr+lng-1])
    ptr += lng-1

In [14]:
val_data['pibs'] = torch.zeros_like(val_data['actionvecs'])
val_data['estm_pibs'] = torch.zeros_like(val_data['actionvecs'])
ptr = 0
for i in range(len(val_data['icustayids'])):
    lng = val_data['lengths'][i]
    val_data['pibs'][i, :lng-1, :] = torch.tensor(val_pibs[ptr:ptr+lng-1])
    val_data['estm_pibs'][i, :lng-1, :] = torch.tensor(val_estm_pibs[ptr:ptr+lng-1])
    ptr += lng-1

In [15]:
test_data['pibs'] = torch.zeros_like(test_data['actionvecs'])
test_data['estm_pibs'] = torch.zeros_like(test_data['actionvecs'])
ptr = 0
for i in range(len(test_data['icustayids'])):
    lng = test_data['lengths'][i]
    test_data['pibs'][i, :lng-1, :] = torch.tensor(test_pibs[ptr:ptr+lng-1])
    test_data['estm_pibs'][i, :lng-1, :] = torch.tensor(test_estm_pibs[ptr:ptr+lng-1])
    ptr += lng-1

In [16]:
# Check consistency of sample size of outputs
for data in [train_data, val_data, test_data]:
    num_episodes = len(data['index'])
    for k, v in data.items():
        assert len(v) == num_episodes

In [17]:
torch.save(train_data, '../data/episodes+encoded_state+knn_pibs/train_data.pt')
torch.save(val_data, '../data/episodes+encoded_state+knn_pibs/val_data.pt')
torch.save(test_data, '../data/episodes+encoded_state+knn_pibs/test_data.pt')

In [18]:
# Check consistency of episode length and feature vector
for i in range(100):
    lng = train_data['lengths'][i]
    assert (train_data['observations'][i][lng:] == 0).all()
    assert (train_data['pibs'][i][lng-1:] == 0).all()
    assert (train_data['estm_pibs'][i][lng-1:] == 0).all()