In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
print(tf.__version__)

2.18.0


In [9]:
## import the data set
df = pd.read_csv('../notebook/data/EHR.csv')

In [10]:
df.head(8)

Unnamed: 0,Patient_ID,Age,Gender,Tumor_Size(cm),Tumor_Type,Biopsy_Result,Treatment,Response_to_Treatment,Survival_Status
0,c044501a-43ca-4a0c-8b8b-991439ba1b6a,52,Female,5.08,Benign,Positive,Surgery,No Response,Survived
1,b8900c4c-1232-4084-9432-5d02eba74d20,32,Female,0.8,Benign,Negative,Surgery,Complete Response,Survived
2,3004e2bc-8037-49cb-a542-d5612b73beab,70,Female,9.56,Benign,Positive,Radiation Therapy,Complete Response,Deceased
3,1df86af7-6745-4dea-b127-cbc9915079fc,21,Female,3.07,Malignant,Negative,Surgery,Partial Response,Survived
4,128e00c3-72e3-4031-a7f4-1165d7199cce,62,Male,7.17,Malignant,Positive,Radiation Therapy,Complete Response,Deceased
5,2b3cc8d5-f2f7-4ce2-be51-f5f489b53244,60,Female,8.31,Benign,Negative,Radiation Therapy,Complete Response,Deceased
6,2f8c5926-bedb-418e-84ed-8b05412e495f,34,Female,0.66,Benign,Negative,Chemotherapy,Complete Response,Survived
7,315f3ae6-b44c-42a8-9ce3-deb728a662f9,69,Female,2.2,Malignant,Negative,Surgery,Partial Response,Survived


### Feature Engineering

In [108]:
X = df.iloc[:,1:-1]

In [109]:
X.head()

Unnamed: 0,Age,Gender,Tumor_Size(cm),Tumor_Type,Biopsy_Result,Treatment,Response_to_Treatment
0,52,Female,5.08,Benign,Positive,Surgery,No Response
1,32,Female,0.8,Benign,Negative,Surgery,Complete Response
2,70,Female,9.56,Benign,Positive,Radiation Therapy,Complete Response
3,21,Female,3.07,Malignant,Negative,Surgery,Partial Response
4,62,Male,7.17,Malignant,Positive,Radiation Therapy,Complete Response


In [111]:
y = df.iloc[:,-1]

In [112]:
y.head()

0    Survived
1    Survived
2    Deceased
3    Survived
4    Deceased
Name: Survival_Status, dtype: object

In [113]:
y = (y == 'Survived').astype(int)

In [114]:
y.head()

0    1
1    1
2    0
3    1
4    0
Name: Survival_Status, dtype: int64

In [115]:
gender = pd.get_dummies(X['Gender'],drop_first=True,dtype=int)
tumor_type = pd.get_dummies(X['Tumor_Type'],drop_first=True,dtype=int)
biopsy_result = pd.get_dummies(X['Biopsy_Result'],drop_first=True,dtype=int)
treatment = pd.get_dummies(X['Treatment'],drop_first=True,dtype=int)
response_to_treatment = pd.get_dummies(X['Response_to_Treatment'],drop_first=True,dtype=int)


In [116]:
X = X.drop(['Gender','Tumor_Type','Biopsy_Result','Treatment','Response_to_Treatment'],axis=1)

In [117]:
X = pd.concat([X,gender,tumor_type,response_to_treatment,biopsy_result,treatment],axis=1)

In [118]:
X.head()

Unnamed: 0,Age,Tumor_Size(cm),Male,Malignant,No Response,Partial Response,Positive,Radiation Therapy,Surgery
0,52,5.08,0,0,1,0,1,0,1
1,32,0.8,0,0,0,0,0,0,1
2,70,9.56,0,0,0,0,1,1,0
3,21,3.07,0,1,0,1,0,0,1
4,62,7.17,1,1,0,0,1,1,0


In [None]:
## train_test split

In [39]:
from sklearn.model_selection import train_test_split

In [119]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,shuffle=True)

In [120]:
X_train.shape

(16000, 9)

In [121]:
y_test.shape

(4000,)

In [63]:
from sklearn.preprocessing import StandardScaler

In [122]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)

In [123]:
X_test = sc.transform(X_test)

### Model constuction

In [58]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers  import Dense, ReLU,ELU

In [166]:
model = Sequential([
    Dense(64, input_dim=9, activation='relu'),  # Input layer with 9 features
    Dense(32, activation='relu'),               # Hidden layer
    Dense(1, activation='sigmoid')              # Output layer (binary classification)
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [157]:
model.add(Dense(9,activation=ReLU))

In [158]:
model.add(Dense(5,activation=ReLU))

In [None]:
model.add(Dense(1,activation='sigmoid'))

### Model Trinaing

In [124]:
train_batch_X = X_train[:5000]
train_batch_y = y_train[:5000]

In [128]:
train_batch_y.shape

(5000,)

In [165]:
train_batch_X.shape

(5000, 9)

In [134]:
train_batch_X[1]

array([ 0.00453591,  1.64429665,  1.01892912,  1.0006252 , -0.71085557,
        1.3971505 , -0.996257  , -0.70181003, -0.69744983])

In [95]:
type(train_batch_y)

pandas.core.series.Series

In [167]:
model.compile(optimizer='adam',metrics=['accuracy'],loss='binary_crossentropy')

In [130]:
es = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=0,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=False,
    start_from_epoch=0
)

In [163]:
model

<Sequential name=sequential_2, built=False>

In [170]:
model_history = model.fit(X_train,y_train,validation_split=.2,epochs=100)
# model_history = model.fit(X_train,y_train,validation_split=.2,epochs=1,batch_size=50)

Epoch 1/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 406us/step - accuracy: 0.5190 - loss: 0.6929 - val_accuracy: 0.5147 - val_loss: 0.6948
Epoch 2/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 374us/step - accuracy: 0.5341 - loss: 0.6896 - val_accuracy: 0.5088 - val_loss: 0.6946
Epoch 3/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384us/step - accuracy: 0.5370 - loss: 0.6892 - val_accuracy: 0.5109 - val_loss: 0.6942
Epoch 4/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366us/step - accuracy: 0.5413 - loss: 0.6887 - val_accuracy: 0.5116 - val_loss: 0.6947
Epoch 5/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 384us/step - accuracy: 0.5372 - loss: 0.6882 - val_accuracy: 0.5044 - val_loss: 0.6961
Epoch 6/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 659us/step - accuracy: 0.5406 - loss: 0.6875 - val_accuracy: 0.5100 - val_loss: 0.6957
Epoch 7/10

In [172]:
model_history.history.keys()

dict_keys(['accuracy', 'loss', 'val_accuracy', 'val_loss'])