In [1]:
# Dependencies and setup
import pandas as pd
from pathlib import Path
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf


In [2]:
# File to load
to_load = Path("cancer_patient_datasets.csv")
# Create the dataframe
nn_df = pd.read_csv(to_load)

In [3]:
# drop unnecessary columns index and Patient Id
nn_df = nn_df.drop(columns=['index', 'Patient Id'])

In [4]:
# convert low, medium, high to numeric
nn_df.Level = nn_df.Level.replace("Low", 0)
nn_df.Level = nn_df.Level.replace("Medium", 1)
nn_df.Level = nn_df.Level.replace("High", 2)

nn_df.Level = nn_df.Level.astype("int64")
# create target and features array
X = nn_df.drop("Level", axis = 1)
y = pd.get_dummies(nn_df["Level"])

In [5]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [6]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [7]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = X_train.shape[1]
hidden_nodes_layer1 =  30
hidden_nodes_layer2 = 20

nn1 = tf.keras.models.Sequential()

In [8]:
# First hidden layer
nn1.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn1.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn1.add(tf.keras.layers.Dense(units=3, activation="softmax"))

# Check the structure of the model
nn1.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# Compile the model
nn1.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (750, 23)
X_test shape: (250, 23)


In [10]:
# Train the model
fit_model = nn1.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 356us/step - accuracy: 0.2624 - loss: 1.1700
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 321us/step - accuracy: 0.7659 - loss: 0.6999
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 325us/step - accuracy: 0.8542 - loss: 0.4720
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312us/step - accuracy: 0.9565 - loss: 0.2980
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 310us/step - accuracy: 0.9524 - loss: 0.2208
Epoch 6/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 283us/step - accuracy: 0.9662 - loss: 0.1678
Epoch 7/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 274us/step - accuracy: 0.9724 - loss: 0.1230
Epoch 8/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 275us/step - accuracy: 0.9708 - loss: 0.1050
Epoch 9/100
[1m24/24[0m [32m━

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288us/step - accuracy: 1.0000 - loss: 2.4300e-04
Epoch 68/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285us/step - accuracy: 1.0000 - loss: 2.3676e-04
Epoch 69/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285us/step - accuracy: 1.0000 - loss: 2.5776e-04
Epoch 70/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 290us/step - accuracy: 1.0000 - loss: 2.2023e-04
Epoch 71/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295us/step - accuracy: 1.0000 - loss: 2.3658e-04
Epoch 72/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288us/step - accuracy: 1.0000 - loss: 2.2418e-04
Epoch 73/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 266us/step - accuracy: 1.0000 - loss: 2.1736e-04
Epoch 74/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 279us/step - accuracy: 1.0000 - loss: 2.0768e-04
Epoch

In [11]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn1.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

8/8 - 0s - 7ms/step - accuracy: 1.0000 - loss: 8.6208e-05
Loss: 8.620772860012949e-05, Accuracy: 1.0


In [12]:
# Export our model to HDF5 file
# Define the file path for saving the model
filepath = "LungCancer.h5"

# Save the model to HDF5 format
nn1.save(filepath)

