In [1]:
# Import necessary libraries
import keras
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout, Input
from keras.layers.noise import AlphaDropout
from keras.layers import Layer
from tensorflow.python.keras import backend as K
from sklearn.model_selection import train_test_split

# Import custom modules
from network import *
from data import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Create the base network
network = dict(n_dense=10, dense_units=16, activation='selu', dropout=AlphaDropout, dropout_rate=0.1,
               kernel_initializer='lecun_normal', optimizer='sgd', num_classes=2)

shared_model = create_base_network(**network)

print("Shared model summary")
shared_model.summary()

Shared model summary
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 16)                15664     
_________________________________________________________________
activation_1 (Activation)    (None, 16)                0         
_________________________________________________________________
alpha_dropout_1 (AlphaDropou (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_2 (Activation)    (None, 16)                0         
_________________________________________________________________
alpha_dropout_2 (AlphaDropou (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 16)                

In [3]:
# Create the siamese network
model = create_siamese_network(shared_model)
print("Siamese network model summary")
model.summary()

Siamese network model summary
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 978)          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 978)          0                                            
__________________________________________________________________________________________________
sequential_1 (Sequential)       (None, 16)           18112       input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
man_dist_1 (ManDist)            (None, 1)            0           sequential_1[1

In [4]:
# Obtain data
gctxfile = "../../Data/Sig Annotated Level 5 Data.gctx"
jsonfile = "../../Data/sig-pert mapping.json"

data, metadata = gctx2pd(gctxfile, jsonfile)
data.head()

cid,REP.A001_A375_24H:A03,REP.A001_A375_24H:A04,REP.A001_A375_24H:A05,REP.A001_A375_24H:A06,REP.A001_A375_24H:A07,REP.A001_A375_24H:A08,REP.A001_A375_24H:A09,REP.A001_A375_24H:A10,REP.A001_A375_24H:A11,REP.A001_A375_24H:A12,...,LJP007_SKL_24H:P19,LJP007_SKL_24H:P20,LJP007_SKL_24H:P21,LJP007_SKL_24H:P22,LJP007_SKL_24H:E21,LJP007_SKL_24H:O13,LJP007_SKL_24H:O14,LJP007_SKL_24H:O24,LJP007_SKL_24H:P24,LJP007_SKL_24H:C19
rid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
780,4.264143,-0.382211,-0.571711,0.584376,0.658348,-0.004232,-0.314762,-0.049558,-0.909517,-0.850654,...,1.091158,0.264409,0.71108,0.768569,4.446,4.4395,6.175,8.0582,10.0,3.0807
7849,0.057249,0.304313,-0.754999,-0.589973,-0.226854,-0.363419,-0.691129,-0.684283,0.521503,-0.640316,...,-0.493212,-0.041785,-0.606896,0.819984,6.6313,10.0,2.8649,0.4905,9.1524,4.5834
6193,-2.139334,-0.995924,-0.71011,-0.026398,-1.143599,-0.850314,-1.052307,-0.463051,-0.494277,0.067007,...,0.807524,-0.062587,0.632009,1.067584,3.4302,1.6831,1.8397,3.3238,-1.2545,0.3805
23,-0.221784,-0.670834,0.428894,-0.065268,0.342426,0.539448,0.357474,-0.233215,-0.7152,-0.41876,...,0.25577,-0.218802,0.257857,0.170097,2.6837,4.5242,4.3375,3.6885,-4.9315,2.265
9552,-0.376555,-0.648242,0.272606,0.542223,0.38047,-0.01121,-1.590959,-0.613891,-0.291982,-0.629392,...,0.301324,-0.841693,0.628758,-0.387287,-1.6202,-2.6985,-1.0555,-1.5548,0.9744,-1.9243


In [5]:
# Obtain targets
target = get_target_labels(data, metadata)
print(len(target))

Creating target labels
0 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000 21000 22000 23000 24000 25000 26000 27000 28000 29000 30000 31000 32000 33000 34000 35000 36000 37000 38000 39000 40000 41000 42000 43000 44000 45000 46000 47000 48000 49000 50000 51000 52000 53000 54000 55000 56000 57000 58000 59000 60000 61000 62000 63000 64000 65000 66000 67000 68000 69000 70000 71000 72000 73000 74000 75000 76000 77000 78000 79000 80000 81000 82000 83000 84000 85000 86000 87000 88000 89000 90000 91000 92000 93000 94000 95000 96000 97000 98000 99000 100000 101000 102000 103000 104000 105000 106000 107000 108000 109000 110000 111000 112000 113000 114000 115000 116000 117000 118000 118050


In [6]:
# Attach labels
data1 = data.transpose()
data1['target'] = target
data = data1.sort_values('target')
data.head()

rid,780,7849,6193,23,9552,387,10921,10285,533,6194,...,11000,6915,6253,7264,5467,2767,23038,57048,79716,target
cid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
REP.A012_HA1E_24H:B12,0.3907,-0.3489,0.6175,-1.76155,0.23555,1.5742,-0.33725,0.2962,0.5354,-0.9375,...,5.0952,-1.07555,-1.00195,0.2474,-1.9162,1.10085,-0.6088,1.20255,-0.4005,BRD-A00147595
REP.A012_HA1E_24H:B10,-0.354954,0.284711,-0.399639,-0.394218,0.26933,0.438478,-0.193475,-0.567282,0.599511,-0.201206,...,0.470325,0.660018,-0.167005,0.138563,0.316941,-0.231024,-0.122175,0.48397,0.055153,BRD-A00147595
REP.A012_HA1E_24H:B11,0.186348,-0.55211,-1.293667,1.087279,0.009011,-0.00971,-0.422903,0.122181,0.304543,0.021282,...,1.642745,2.120592,1.446014,-0.730442,-0.175147,-1.005282,0.414229,0.341628,1.157403,BRD-A00147595
REP.A012_MCF7_24H:B12,-0.409912,-0.012908,-0.357194,0.637424,-0.161387,0.67401,-0.540815,0.3139,-0.459992,0.065468,...,-0.730799,0.3421,-0.226505,0.312267,0.219967,0.585921,0.435886,0.102224,0.463525,BRD-A00147595
REP.A012_MCF7_24H:B08,-0.456875,-0.164637,-0.705794,-0.402885,-2.056623,-0.542763,0.0,-1.418116,-0.543096,0.506668,...,-0.664681,-0.162204,-2.875566,0.113363,-0.143262,0.889934,0.909341,0.317749,0.296469,BRD-A00147595


In [7]:
# Create the 2 dictionaries
location_pert = create_location_pert(data)

creating location_pert
0 100 200 300 400 500 600 700 800 900 1000 1100 1200 1300 1400 1500 1600 1700 1800 1900 2000 2100 

In [8]:
pert2profiles = create_pert2profile(data)

Creating pert2profile


In [9]:
train, test = train_and_test_perturbagens(np.unique(data.target))
train

array(['BRD-K01663662', 'BRD-K33732501', 'BRD-K47983010', ...,
       'BRD-K01815685', 'BRD-K71822263', 'BRD-K50000283'], dtype=object)

In [18]:
X_train,y_train=generate_data(data,train,2)

batch_size:  6944


2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94 96 98 100 102 104 106 108 110 112 114 116 118 120 122 124 126 128 130 132 134 136 138 140 142 144 146 148 150 152 154 156 158 160 162 164 166 168 170 172 174 176 178 180 182 184 186 188 190 192 194 196 198 200 202 204 206 208 210 212 214 216 218 220 222 224 226 228 230 232 234 236 238 240 242 244 246 248 250 252 254 256 258 260 262 264 266 268 270 272 274 276 278 280 282 284 286 288 290 292 294 296 298 300 302 304 306 308 310 312 314 316 318 320 322 324 326 328 330 332 334 336 338 340 342 344 346 348 350 352 354 356 358 360 362 364 366 368 370 372 374 376 378 380 382 384 386 388 390 392 394 396 398 400 402 404 406 408 410 412 414 416 418 420 422 424 426 428 430 432 434 436 438 440 442 444 446 448 450 452 454 456 458 460 462 464 466 468 470 472 474 476 478 480 482 484 486 488 490 492 494 496 498 500 502 504 506 508 510 512 514 516 518 520 522 524 526 5

3500 3502 3504 3506 3508 3510 3512 3514 3516 3518 3520 3522 3524 3526 3528 3530 3532 3534 3536 3538 3540 3542 3544 3546 3548 3550 3552 3554 3556 3558 3560 3562 3564 3566 3568 3570 3572 3574 3576 3578 3580 3582 3584 3586 3588 3590 3592 3594 3596 3598 3600 3602 3604 3606 3608 3610 3612 3614 3616 3618 3620 3622 3624 3626 3628 3630 3632 3634 3636 3638 3640 3642 3644 3646 3648 3650 3652 3654 3656 3658 3660 3662 3664 3666 3668 3670 3672 3674 3676 3678 3680 3682 3684 3686 3688 3690 3692 3694 3696 3698 3700 3702 3704 3706 3708 3710 3712 3714 3716 3718 3720 3722 3724 3726 3728 3730 3732 3734 3736 3738 3740 3742 3744 3746 3748 3750 3752 3754 3756 3758 3760 3762 3764 3766 3768 3770 3772 3774 3776 3778 3780 3782 3784 3786 3788 3790 3792 3794 3796 3798 3800 3802 3804 3806 3808 3810 3812 3814 3816 3818 3820 3822 3824 3826 3828 3830 3832 3834 3836 3838 3840 3842 3844 3846 3848 3850 3852 3854 3856 3858 3860 3862 3864 3866 3868 3870 3872 3874 3876 3878 3880 3882 3884 3886 3888 3890 3892 3894 3896 3898 

6778 6780 6782 6784 6786 6788 6790 6792 6794 6796 6798 6800 6802 6804 6806 6808 6810 6812 6814 6816 6818 6820 6822 6824 6826 6828 6830 6832 6834 6836 6838 6840 6842 6844 6846 6848 6850 6852 6854 6856 6858 6860 6862 6864 6866 6868 6870 6872 6874 6876 6878 6880 6882 6884 6886 6888 6890 6892 6894 6896 6898 6900 6902 6904 6906 6908 6910 6912 6914 6916 6918 6920 6922 6924 6926 6928 6930 6932 6934 6936 6938 6940 6942 6944 

In [15]:
X_test,y_test=generate_data(data,test,2)

batch_size:  1736
2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94 96 98 100 102 104 106 108 110 112 114 116 118 120 122 124 126 128 130 132 134 136 138 140 142 144 146 148 150 152 154 156 158 160 162 164 166 168 170 172 174 176 178 180 182 184 186 188 190 192 194 196 198 200 202 204 206 208 210 212 214 216 218 220 222 224 226 228 230 232 234 236 238 240 242 244 246 248 250 252 254 256 258 260 262 264 266 268 270 272 274 276 278 280 282 284 286 288 290 292 294 296 298 300 302 304 306 308 310 312 314 316 318 320 322 324 326 328 330 332 334 336 338 340 342 344 346 348 350 352 354 356 358 360 362 364 366 368 370 372 374 376 378 380 382 384 386 388 390 392 394 396 398 400 402 404 406 408 410 412 414 416 418 420 422 424 426 428 430 432 434 436 438 440 442 444 446 448 450 452 454 456 458 460 462 464 466 468 470 472 474 476 478 480 482 484 486 488 490 492 494 496 498 500 502 504 506 508 510 512 514 516 518

In [19]:
model.fit([X_train[0], X_train[1]], y_train, epochs=100, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100

KeyboardInterrupt: 