In [1]:
# Dependencies and setup
import pandas as pd
from pathlib import Path
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import joblib

In [2]:
# File to load
to_load = Path("cancer_patient_datasets.csv")
# Create the dataframe
nn_df = pd.read_csv(to_load)

In [3]:
# drop unnecessary columns index and Patient Id
nn_df = nn_df.drop(columns=['index', 'Patient Id'])

In [4]:
# convert low, medium, high to numeric
nn_df.Level = nn_df.Level.replace("Low", 0)
nn_df.Level = nn_df.Level.replace("Medium", 1)
nn_df.Level = nn_df.Level.replace("High", 2)

nn_df.Level = nn_df.Level.astype("int64")
# create target and features array
X = nn_df.drop("Level", axis = 1)
y = pd.get_dummies(nn_df["Level"])

In [5]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [6]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Save the scaler object
joblib.dump(scaler, 'scaler.pkl')

['scaler.pkl']

In [7]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = X_train.shape[1]
hidden_nodes_layer1 =  65
hidden_nodes_layer2 = 30

nn1 = tf.keras.models.Sequential()

In [8]:
# First hidden layer
nn1.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn1.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn1.add(tf.keras.layers.Dense(units=3, activation="softmax"))

# Check the structure of the model
nn1.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# Compile the model
nn1.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (750, 23)
X_test shape: (250, 23)


In [10]:
# Train the model
fit_model = nn1.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381us/step - accuracy: 0.4904 - loss: 1.0599
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 339us/step - accuracy: 0.9288 - loss: 0.4987
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302us/step - accuracy: 0.9454 - loss: 0.2592
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 289us/step - accuracy: 0.9707 - loss: 0.1247
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 282us/step - accuracy: 0.9828 - loss: 0.0835
Epoch 6/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283us/step - accuracy: 0.9935 - loss: 0.0471
Epoch 7/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 279us/step - accuracy: 1.0000 - loss: 0.0352
Epoch 8/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294us/step - accuracy: 1.0000 - loss: 0.0265
Epoch 9/100
[1m24/24[0m [32m━

Epoch 67/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 292us/step - accuracy: 1.0000 - loss: 1.5273e-04
Epoch 68/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294us/step - accuracy: 1.0000 - loss: 1.6489e-04
Epoch 69/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293us/step - accuracy: 1.0000 - loss: 1.4853e-04
Epoch 70/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 294us/step - accuracy: 1.0000 - loss: 1.3812e-04
Epoch 71/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283us/step - accuracy: 1.0000 - loss: 1.4346e-04
Epoch 72/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 284us/step - accuracy: 1.0000 - loss: 1.2932e-04
Epoch 73/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 300us/step - accuracy: 1.0000 - loss: 1.2676e-04
Epoch 74/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303us/step - accuracy: 1.0000 - loss: 1.2

In [11]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn1.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

8/8 - 0s - 7ms/step - accuracy: 1.0000 - loss: 6.4456e-05
Loss: 6.445607141358778e-05, Accuracy: 1.0


In [12]:
# Export our model to HDF5 file
# Define the file path for saving the model
filepath = "LungCancer.h5"

# Save the model to HDF5 format
nn1.save(filepath)



In [13]:
# Train the model
fit_model = nn1.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 467us/step - accuracy: 1.0000 - loss: 6.0913e-05
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 340us/step - accuracy: 1.0000 - loss: 5.7176e-05
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295us/step - accuracy: 1.0000 - loss: 5.2415e-05
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 286us/step - accuracy: 1.0000 - loss: 5.7762e-05
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 292us/step - accuracy: 1.0000 - loss: 5.4289e-05
Epoch 6/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308us/step - accuracy: 1.0000 - loss: 5.5330e-05
Epoch 7/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 271us/step - accuracy: 1.0000 - loss: 4.1585e-05
Epoch 8/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 277us/step - accuracy: 1.0000 - loss: 4.9416e-05


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 306us/step - accuracy: 1.0000 - loss: 1.6702e-05
Epoch 67/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 302us/step - accuracy: 1.0000 - loss: 1.4831e-05
Epoch 68/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303us/step - accuracy: 1.0000 - loss: 1.6345e-05
Epoch 69/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 299us/step - accuracy: 1.0000 - loss: 1.5096e-05
Epoch 70/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 301us/step - accuracy: 1.0000 - loss: 1.3151e-05
Epoch 71/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 291us/step - accuracy: 1.0000 - loss: 1.3537e-05
Epoch 72/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 292us/step - accuracy: 1.0000 - loss: 1.3198e-05
Epoch 73/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 296us/step - accuracy: 1.0000 - loss: 1.3234e-05
Epoch

In [14]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn1.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

8/8 - 0s - 2ms/step - accuracy: 1.0000 - loss: 9.6171e-06
Loss: 9.617089745006524e-06, Accuracy: 1.0


In [15]:
# Export our model to HDF5 file
# Define the file path for saving the model
filepath = "NoScale_model.h5"

# Save the model to HDF5 format
nn1.save(filepath)

