# Tutorial

Here, we introduce how to use the model.

In [1]:
from matching_rep import MatchingRep
import json
import numpy as np

# Load data

In [2]:
with open('./data/gmix.json', 'r', encoding='utf8') as f:
    dic = json.loads(f.read())
    
data = np.array(dic['data'])
n_feature_x = dic['n_feature_x']
n_feature_o = dic['n_feature_o']

# random shuffle data
np.random.shuffle(data)

# split dataset into training/validation/test sets
split = int(data.shape[0]/10)
train = data[:-2*split]
valid = data[-2*split:-split]
test = data[-split:]

# split patients/organs/outcomes
train_X = train[:, :n_feature_x]
train_O = train[:, n_feature_x:-1]
train_Y = train[:, -1]

valid_X = valid[:, :n_feature_x]
valid_O = valid[:, n_feature_x:-1]
valid_Y = valid[:, -1]

test_X = test[:, :n_feature_x]
test_O = test[:, n_feature_x:-1]
test_Y = test[:, -1]

print('num feature of patients: ', n_feature_x)
print('num feature of organs: ', n_feature_o)
print('sample size: ', data.shape[0])
print('training set size: ', train.shape[0])

num feature of patients:  128
num feature of organs:  64
sample size:  10000
training set size:  8000


# Build and Train Model

In [3]:
# initialize the model
model = MatchingRep(n_feature_x, n_feature_o, n_clusters=3)

# train the model
hist = model.fit([train_X, train_O], train_Y, validation_data=([valid_X, valid_O], valid_Y), batch_size=256, epochs=50, verbose=0)

pre-training auto-encoder
pre-training clusters
Reached convergence threshold. Stopping training.


# Evaluate the model

In [4]:
# load the best model from checkpoint
model.load_weights(path='./model/MatchingRepCheckpoint')

loss = model.evaluate([test_X, test_O], test_Y)
print('mean squared error on test set: ', np.log(loss))

mean squared error on test set:  8.979059677753243


# Get predictions

In [5]:
# load the best model from checkpoint
model.load_weights(path='./model/MatchingRepCheckpoint')


# predict all potential outcomes of each patient
ys_pred = model.predict(test_X)

# predict the potential outcome of each patient-organ pair
y_pred = model.predict_y([test_X, test_O])

# predict soft clustering result of each organ
clus = model.predict_clus(test_O)

# Perform Allocation

In [6]:
idx_o = 0 # a random organ index for test
idx = model.allocate_one(test_X, test_O[idx_o])

print('the organ is allocated to patient ', idx)
print('the estimated survival time is: ', model.predict_y([test_X[idx].reshape((1, -1)), test_O[idx_o].reshape(1, -1)])[0, 0])

the organ is allocated to patient  0
the estimated survival time is:  923.3725


# Run Allocation Simulation
we also provide code for several simulation experiments in our paper

In [2]:
from simulation import run_simulation

run_simulation(data_path='./data/gmixbiased.json')

pre-training auto-encoder
pre-training clusters
Reached convergence threshold. Stopping training.
death rate:  0.3522  ( 1761 / 5000 )
avg survival time:  470.0432371420486  ( 2350216.185710243 / 5000 )
avg benefit:  100.69963076599974  ( 503498.1538299987 / 5000 )
death rate:  0.3524  ( 1762 / 5000 )
avg survival time:  468.64904711881235  ( 2343245.2355940617 / 5000 )
avg benefit:  104.00545770729195  ( 520027.2885364598 / 5000 )
death rate:  0.3918  ( 1959 / 5000 )
avg survival time:  456.49278372631755  ( 2282463.9186315876 / 5000 )
avg benefit:  88.58481905684403  ( 442924.0952842201 / 5000 )
death rate:  0.2694  ( 1347 / 5000 )
avg survival time:  527.5707664486707  ( 2637853.8322433536 / 5000 )
avg benefit:  148.72144473662792  ( 743607.2236831395 / 5000 )
death rate:  0.2718  ( 1359 / 5000 )
avg survival time:  527.1638361923992  ( 2635819.1809619963 / 5000 )
avg benefit:  243.05089399658655  ( 1215254.4699829328 / 5000 )
----- assignment flip -----
flip ratio:  0.4864864864864

for detailed instruction, see matcing_rep.py