In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Model, Input
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Dense, Embedding, LSTM, concatenate
from keras.optimizers import Adam
from keras.preprocessing import sequence
from keras_tqdm import TQDMCallback
from keras.models import model_from_json


Using TensorFlow backend.


In [2]:
validation = pd.read_csv('test_data.csv')
validation.head()


Unnamed: 0,PatientID,Resp,PR Seq,RT Seq,VL-t0,CD4-t0
0,1,H,NCTCTATTAGATACAGGAGCAGATGACACAGTATTAGAAGARATGG...,CCTATTAGTCCTATTGAAACTGTACCAGTRAAATTAAAGCCAGGAA...,5.6,69
1,2,H,NCTCTATTAGATACAGGAGCAGATGATACAGTATTAGAAGAAATGA...,CCCATCAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAA...,5.3,119
2,3,H,GGGCAAATAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAG...,CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAA...,5.7,41
3,4,H,GGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATACAG...,CCTATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAA...,5.2,48
4,5,H,GGGGGGCAACTAAAGGAAGCTCTATTAGATACAGGAGCAGATGATA...,CCCATTAGTCCTATTGAAACTGTACCAGTAAAATTAAAGCCAGGAA...,5.5,311


In [3]:
validation = validation.dropna()


In [4]:
def clean(x):
    x = x.replace("A", "1").replace("T", "2").replace("C", "3").replace("G", "4").replace("U", "5").replace("W", "6").replace("S", "7").replace("M", "8").replace("K", "9").replace("R", "10").replace("Y", "11").replace("B", "12").replace("D", "13").replace("H", "14").replace("V", "15").replace("N", "16").replace("Z", "17")
    return x
    
def transform_to_array(x):
    x = np.asarray(list(x), dtype=np.uint8)
    return x

validation['PR Seq'] = validation['PR Seq'].apply(clean)
validation['PR Seq'] = validation['PR Seq'].apply(transform_to_array)

validation['RT Seq'] = validation['RT Seq'].apply(clean)
validation['RT Seq'] = validation['RT Seq'].apply(transform_to_array)
    
validation.head()


Unnamed: 0,PatientID,Resp,PR Seq,RT Seq,VL-t0,CD4-t0
0,1,H,"[1, 6, 3, 2, 3, 2, 1, 2, 2, 1, 4, 1, 2, 1, 3, ...","[3, 3, 2, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",5.6,69
1,2,H,"[1, 6, 3, 2, 3, 2, 1, 2, 2, 1, 4, 1, 2, 1, 3, ...","[3, 3, 3, 1, 2, 3, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",5.3,119
2,3,H,"[4, 4, 4, 3, 1, 1, 1, 2, 1, 1, 1, 4, 4, 1, 1, ...","[3, 3, 3, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",5.7,41
3,4,H,"[4, 4, 4, 3, 1, 1, 3, 2, 1, 1, 1, 4, 4, 1, 1, ...","[3, 3, 2, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",5.2,48
4,5,H,"[4, 4, 4, 4, 4, 4, 3, 1, 1, 3, 2, 1, 1, 1, 4, ...","[3, 3, 3, 1, 2, 2, 1, 4, 2, 3, 3, 2, 1, 2, 2, ...",5.5,311


In [5]:
max_PR = 312
max_RT = 1575

padded_PR = sequence.pad_sequences(validation['PR Seq'], maxlen=max_PR, dtype=np.uint8)

padded_RT = sequence.pad_sequences(validation['RT Seq'], maxlen=max_RT, dtype=np.uint8)



In [6]:
with open('model_architecture.json', 'r') as f:
    model = model_from_json(f.read())
model.load_weights('weights.hdf5')


Instructions for updating:
Colocations handled automatically by placer.


In [7]:
prob = model.predict([padded_PR, padded_RT, validation[['VL-t0', 'CD4-t0']]])
prob


array([[0.6455043 ],
       [0.6458092 ],
       [0.5299136 ],
       [0.63996655],
       [0.5256609 ],
       [0.55012065],
       [0.61758393],
       [0.53959894],
       [0.56374633],
       [0.6308065 ],
       [0.7442082 ],
       [0.71668226],
       [0.65655816],
       [0.62611425],
       [0.72042835],
       [0.61375105],
       [0.6855879 ],
       [0.39698374],
       [0.67675984],
       [0.8195394 ],
       [0.53259313],
       [0.76496446],
       [0.5103101 ],
       [0.65684545],
       [0.68963134],
       [0.7028545 ],
       [0.6428055 ],
       [0.7173037 ],
       [0.51801115],
       [0.763407  ],
       [0.35855418],
       [0.7063978 ],
       [0.4640434 ],
       [0.4788743 ],
       [0.7158658 ],
       [0.6802503 ],
       [0.3996277 ],
       [0.64667505],
       [0.6753826 ],
       [0.8350662 ],
       [0.72975147],
       [0.73886704],
       [0.620827  ],
       [0.6467795 ],
       [0.49658197],
       [0.6651166 ],
       [0.761672  ],
       [0.698

In [8]:
mean = np.mean(prob)
mean

0.5442534

In [9]:
for i, item in enumerate(prob):
    if item >= mean:
        prob[i] = 1
    else:
        prob[i] = 0
prob    



array([[1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],