In [52]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout
from keras.optimizers import Adam, SGD
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

In [2]:
TRAIN_FEATURE_PATH = Path('/gpfs1/scratch/90days/s4436005/img_reg/features/Xception_train_features.csv')
VAL_FEATURE_PATH = Path('/gpfs1/scratch/90days/s4436005/img_reg/features/Xception_1794_T_features.csv')

In [14]:
train_features = pd.read_csv(TRAIN_FEATURE_PATH)
print(train_features.shape)
val_features = pd.read_csv(VAL_FEATURE_PATH)
print(val_features.shape)

(29076, 2050)
(1817, 2050)


In [16]:
train_features.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,2039,2040,2041,2042,2043,2044,2045,2046,2047,label
0,0,0.0,5.878077,0.0,0.0,0.0,0.0,0.135842,0.0,0.068097,...,0.0,0.277605,0.0,0.0,0.0,0.0,0.0,1.00107,0.0,cancer
1,1,0.0,1.13431,7.630448,0.0,0.0,0.585219,1.725968,0.0,2.846333,...,0.651515,0.0216,0.0,0.0,0.0,0.0,0.0,2.063413,0.0,cancer
2,2,0.0,1.061499,0.0,0.0,0.0,0.061457,0.462785,0.0,0.0,...,0.049169,0.809391,0.0,0.0,0.0,0.0,0.0,0.0,0.0,non-cancer
3,3,0.0,2.247305,0.0,0.0,0.0,0.002337,0.033633,0.0,0.0,...,0.030947,0.238555,0.0,0.0,0.0,0.0,0.0,1.151081,0.0,non-cancer
4,4,0.0,1.492867,0.640722,0.0,0.0,0.721783,1.523534,0.0,0.976771,...,0.0,0.730104,0.0,0.0,0.0,0.0,0.0,0.235231,0.0,non-cancer


In [17]:
val_features.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,2039,2040,2041,2042,2043,2044,2045,2046,2047,label
0,0,0.0,0.31513,3.341671,0.0,0.0,0.564303,0.0,0.0,5.155958,...,0.148506,1.928131,0.0,0.0,0.0,0.0,0.0,0.979687,0.0,non-cancer
1,1,0.0,0.204062,0.22362,0.0,0.0,0.0,0.0775,0.0,0.0,...,0.0,0.407297,0.0,0.0,0.0,0.0,0.0,0.106358,0.0,cancer
2,2,0.0,1.348258,0.004712,0.0,0.0,0.0,0.131601,0.0,0.0,...,0.462664,1.56748,0.0,0.0,0.0,0.0,0.0,0.300096,0.168038,non-cancer
3,3,0.0,1.019261,0.0,0.0,0.0,0.022239,0.076245,0.0,0.0,...,0.0,0.226489,0.0,0.0,0.0,0.0,0.0,0.147865,0.0,cancer
4,4,0.0,1.405072,0.177669,0.0,0.0,0.020185,0.115404,0.0,0.0,...,0.480158,1.295888,0.0,0.0,0.0,0.0,0.0,0.038463,0.0,cancer


In [18]:
train_features = train_features.drop('Unnamed: 0', axis = 'columns')
val_features = val_features.drop('Unnamed: 0', axis = 'columns')

In [19]:
train_features.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2039,2040,2041,2042,2043,2044,2045,2046,2047,label
0,0.0,5.878077,0.0,0.0,0.0,0.0,0.135842,0.0,0.068097,0.0,...,0.0,0.277605,0.0,0.0,0.0,0.0,0.0,1.00107,0.0,cancer
1,0.0,1.13431,7.630448,0.0,0.0,0.585219,1.725968,0.0,2.846333,0.0,...,0.651515,0.0216,0.0,0.0,0.0,0.0,0.0,2.063413,0.0,cancer
2,0.0,1.061499,0.0,0.0,0.0,0.061457,0.462785,0.0,0.0,0.0,...,0.049169,0.809391,0.0,0.0,0.0,0.0,0.0,0.0,0.0,non-cancer
3,0.0,2.247305,0.0,0.0,0.0,0.002337,0.033633,0.0,0.0,0.0,...,0.030947,0.238555,0.0,0.0,0.0,0.0,0.0,1.151081,0.0,non-cancer
4,0.0,1.492867,0.640722,0.0,0.0,0.721783,1.523534,0.0,0.976771,0.0,...,0.0,0.730104,0.0,0.0,0.0,0.0,0.0,0.235231,0.0,non-cancer


In [20]:
val_features.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2039,2040,2041,2042,2043,2044,2045,2046,2047,label
0,0.0,0.31513,3.341671,0.0,0.0,0.564303,0.0,0.0,5.155958,0.0,...,0.148506,1.928131,0.0,0.0,0.0,0.0,0.0,0.979687,0.0,non-cancer
1,0.0,0.204062,0.22362,0.0,0.0,0.0,0.0775,0.0,0.0,0.0,...,0.0,0.407297,0.0,0.0,0.0,0.0,0.0,0.106358,0.0,cancer
2,0.0,1.348258,0.004712,0.0,0.0,0.0,0.131601,0.0,0.0,0.0,...,0.462664,1.56748,0.0,0.0,0.0,0.0,0.0,0.300096,0.168038,non-cancer
3,0.0,1.019261,0.0,0.0,0.0,0.022239,0.076245,0.0,0.0,0.0,...,0.0,0.226489,0.0,0.0,0.0,0.0,0.0,0.147865,0.0,cancer
4,0.0,1.405072,0.177669,0.0,0.0,0.020185,0.115404,0.0,0.0,0.0,...,0.480158,1.295888,0.0,0.0,0.0,0.0,0.0,0.038463,0.0,cancer


In [21]:
train_labels = train_features['label']
val_labels = val_features['label']

In [22]:
lb = LabelBinarizer()
Y_train = lb.fit_transform(train_labels)
Y_val = lb.transform(val_labels)

In [26]:
X_train = np.array(train_features.drop('label', axis = 'columns'))
X_val = np.array(val_features.drop('label', axis = 'columns'))

In [31]:
#x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.5, random_state = 42)

In [28]:
#random_array = np.round(np.random.rand(8)).astype(np.uint8)

In [57]:
model = Sequential()
#model.add(Dense(2048, input_shape=(2048,), activation = 'relu'))
model.add(Dense(2048, input_shape=(2048,), activation = 'relu'))
#model.add(Dropout(0.5))
model.add(Dense(1024, activation = 'relu'))
#model.add(Dropout(0.5))
model.add(Dense(1024, activation = 'relu'))
#model.add(Dropout(0.5))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = 'SGD', metrics=['accuracy'] )

In [60]:
H = model.fit(X_train[:5000], Y_train[:5000], validation_data=(X_val, Y_val), epochs = 100, batch_size = 2560)

Train on 5000 samples, validate on 1817 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100


KeyboardInterrupt: 