In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Model, Input
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Dense, Embedding, LSTM, concatenate
from keras.optimizers import Adam
from keras.preprocessing import sequence
from keras_tqdm import TQDMCallback
from keras.models import model_from_json


Using TensorFlow backend.


In [2]:
validation = pd.read_csv('training_data.csv')
validation.head()


Unnamed: 0,PatientID,Resp,PR Seq,RT Seq,VL-t0,CD4-t0
0,1,0,CCTCAAATCACTCTTTGGCAACGACCCCTCGTCCCAATAAGGATAG...,CCCATTAGTCCTATTGAAACTGTACCAGTAAAGCTAAAGCCAGGAA...,4.3,145
1,2,0,CCTCAAATCACTCTTTGGCAACGACCCCTCGTCGCAATAAAGATAG...,CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAA...,3.6,224
2,3,0,CCTCAAATCACTCTTTGGCAACGACCCCTCGTCGCAATAAAGGTAG...,CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAA...,3.2,1017
3,4,0,CCTCAAATCACTCTTTGGCAACGACCCCTCGTCGCAATAAGGATAG...,CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAA...,5.7,206
4,5,0,CCTCAAATCACTCTTTGGCAACGACCCCTCGTCGCAGTAAAGATAG...,CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAA...,3.5,572


In [3]:
validation = validation.dropna()


In [4]:
def clean(x):
    x = x.replace("A", "1").replace("T", "2").replace("C", "3").replace("G", "4").replace("U", "5").replace("W", "6").replace("S", "7").replace("M", "8").replace("K", "9").replace("R", "10").replace("Y", "11").replace("B", "12").replace("D", "13").replace("H", "14").replace("V", "15").replace("N", "16").replace("Z", "17")
    return x
    
def transform_to_array(x):
    x = np.asarray(list(x), dtype=np.uint8)
    return x

validation['PR Seq'] = validation['PR Seq'].apply(clean)
validation['PR Seq'] = validation['PR Seq'].apply(transform_to_array)

validation['RT Seq'] = validation['RT Seq'].apply(clean)
validation['RT Seq'] = validation['RT Seq'].apply(transform_to_array)
    
validation.head()


Unnamed: 0,PatientID,Resp,PR Seq,RT Seq,VL-t0,CD4-t0
0,1,0,"[3, 3, 2, 3, 1, 1, 1, 2, 3, 1, 3, 2, 3, 2, 2, ...","[3, 3, 3, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",4.3,145
1,2,0,"[3, 3, 2, 3, 1, 1, 1, 2, 3, 1, 3, 2, 3, 2, 2, ...","[3, 3, 3, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",3.6,224
2,3,0,"[3, 3, 2, 3, 1, 1, 1, 2, 3, 1, 3, 2, 3, 2, 2, ...","[3, 3, 3, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",3.2,1017
3,4,0,"[3, 3, 2, 3, 1, 1, 1, 2, 3, 1, 3, 2, 3, 2, 2, ...","[3, 3, 3, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",5.7,206
4,5,0,"[3, 3, 2, 3, 1, 1, 1, 2, 3, 1, 3, 2, 3, 2, 2, ...","[3, 3, 3, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",3.5,572


In [5]:
max_PR = 312
max_RT = 1575

padded_PR = sequence.pad_sequences(validation['PR Seq'], maxlen=max_PR, dtype=np.uint8)

padded_RT = sequence.pad_sequences(validation['RT Seq'], maxlen=max_RT, dtype=np.uint8)



In [6]:
with open('model_architecture.json', 'r') as f:
    model = model_from_json(f.read())
model.load_weights('weights.hdf5')


Instructions for updating:
Colocations handled automatically by placer.


In [7]:
prob = model.predict([padded_PR, padded_RT, validation[['VL-t0', 'CD4-t0']]])


In [8]:
mean = np.mean(prob)
mean

0.50698775

In [9]:
for i, item in enumerate(prob):
    if item >= mean:
        prob[i] = 1
    else:
        prob[i] = 0
prob    

array([[1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [10]:
true = np.asarray(validation[['Resp']])
true


array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
    

In [11]:
test = true - prob 

In [12]:
1 - (np.count_nonzero(test) / len(true))


0.6597826086956522