In [1]:
# Dependencies and setup
import pandas as pd
from pathlib import Path
# from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import tensorflow as tf
from tensorflow import keras
import keras_tuner as kt

In [None]:
# File to load
to_load = Path("cancer_patient_datasets.csv")

In [None]:
# Create the dataframe
lung_df = pd.read_csv(to_load)
lung_df.head()

In [None]:
# check the data types
lung_df.info()

In [None]:
# drop unnecessary columns index and Patient Id
lung_df = lung_df.drop(columns=['index', 'Patient Id'])
lung_df.head()

In [None]:
nn_df = lung_df


In [None]:
# transform levels to int ( fit_transform gives value 1 = low, 2 = medium, 0 = high)
# label_encoding = LabelEncoder()
# lung_df['Level'] = label_encoding.fit_transform(lung_df['Level'])

# use rename to reorder
lung_df.Level = lung_df.Level.replace("Low", 0)
lung_df.Level = lung_df.Level.replace("Medium", 1)
lung_df.Level = lung_df.Level.replace("High", 2)

lung_df.Level = lung_df.Level.astype("int64")



lung_df.head()

In [None]:
lung_df.describe()

In [None]:
lung_df.hist(figsize=(12,12))
plt.show()

In [None]:
plt.figure(figsize=(16,16))
sns.heatmap(lung_df.corr(),annot=True,fmt=".0%")
plt.savefig('heatmap.png')
plt.show()

In [None]:
# Create boxplots to show any outliers
boxplot_lung_df = lung_df.drop('Level', axis = 1)
for column in boxplot_lung_df:
  plt.figure(figsize=(4, 2))
  sns.boxplot(x=boxplot_lung_df[column])
  plt.title(f"Boxplot of {column}")
  plt.show()

## Training the models

In [None]:
X = lung_df.drop('Level', axis = 1)
y = lung_df['Level'].values

In [None]:
nn_df.Level = nn_df.Level.replace("Low", 0)
nn_df.Level = nn_df.Level.replace("Medium", 1)
nn_df.Level = nn_df.Level.replace("High", 2)

nn_df.Level = nn_df.Level.astype("int64")

X = nn_df.drop("Level", axis = 1)
y = pd.get_dummies(nn_df["Level"])

In [None]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [10]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# run log regression on pre scaled models
logreg = LogisticRegression()
logreg.fit(X_train, y_train)
y_pred = logreg.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

In [None]:
# run log regression on scaled models
logreg = LogisticRegression()
logreg.fit(X_train_scaled, y_train)
y_pred = logreg.predict(X_test_scaled)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

In [18]:
# Create a method that creates a new Sequential model with hyperparameter options
def create_model(hp):
    nn_model = tf.keras.models.Sequential()

    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','softmax'])
    
    # Allow kerastuner to decide number of neurons in first layer
    nn_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=10,
        step=2), activation=activation, input_dim=23))

    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 6)):
        nn_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=10,
            step=2),
            activation=activation))
    
    nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

    # Compile the model
    nn_model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return nn_model

In [21]:

tuner = kt.Hyperband(
    create_model,
    objective="val_accuracy",
    max_epochs=20,
    hyperband_iterations=2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [22]:
# Run the kerastuner search for best hyperparameters
tuner.search(X_train_scaled,y_train,epochs=20,validation_data=(X_test_scaled,y_test))

Trial 2 Complete [00h 00m 00s]

Best val_accuracy So Far: None
Total elapsed time: 00h 00m 01s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
softmax           |softmax           |activation
7                 |1                 |first_units
6                 |6                 |num_layers
5                 |1                 |units_0
1                 |1                 |units_1
7                 |1                 |units_2
9                 |1                 |units_3
9                 |1                 |units_4
1                 |1                 |units_5
3                 |3                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/3


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  return self.fn(y_true, y_pred, **self._fn_kwargs)
Traceback (most recent call last):
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/tuners/hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "/Users/miha_mac/a

RuntimeError: Number of consecutive failures exceeded the limit of 3.
Traceback (most recent call last):
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/engine/base_tuner.py", line 274, in _try_run_and_update_trial
    self._run_and_update_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/engine/base_tuner.py", line 239, in _run_and_update_trial
    results = self.run_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/tuners/hyperband.py", line 427, in run_trial
    return super().run_trial(trial, *fit_args, **fit_kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/engine/tuner.py", line 314, in run_trial
    obj_value = self._build_and_fit_model(trial, *args, **copied_kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/engine/tuner.py", line 233, in _build_and_fit_model
    results = self.hypermodel.fit(hp, model, *args, **kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras_tuner/src/engine/hypermodel.py", line 149, in fit
    return model.fit(*args, **kwargs)
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py", line 122, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Users/miha_mac/anaconda3/envs/dev/lib/python3.10/site-packages/keras/src/backend/tensorflow/nn.py", line 554, in categorical_crossentropy
    raise ValueError(
ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 3), output.shape=(None, 1)


In [23]:
# Get best model hyperparameters
best_hyper = tuner.get_best_hyperparameters(1)[0]
best_hyper.values

{'activation': 'softmax',
 'first_units': 1,
 'num_layers': 6,
 'units_0': 1,
 'tuner/epochs': 3,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 2,
 'tuner/round': 0,
 'units_1': 1,
 'units_2': 1,
 'units_3': 1,
 'units_4': 1,
 'units_5': 1}

In [None]:
# Evaluate best model against full test data
best_model = tuner.get_best_models(1)[0]
model_loss, model_accuracy = best_model.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

In [11]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = X_train.shape[1]
hidden_nodes_layer1 =  30
hidden_nodes_layer2 = 20

nn1 = tf.keras.models.Sequential()

In [12]:
# First hidden layer
nn1.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn1.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn1.add(tf.keras.layers.Dense(units=3, activation="softmax"))

# Check the structure of the model
nn1.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [13]:
# Compile the model
nn1.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (750, 23)
X_test shape: (250, 23)


In [14]:
# Train the model use the callback
fit_model = nn1.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 448us/step - accuracy: 0.2410 - loss: 1.2793
Epoch 2/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297us/step - accuracy: 0.6550 - loss: 0.8991
Epoch 3/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 381us/step - accuracy: 0.8362 - loss: 0.6325
Epoch 4/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 354us/step - accuracy: 0.8784 - loss: 0.4285
Epoch 5/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 394us/step - accuracy: 0.9298 - loss: 0.2834
Epoch 6/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 339us/step - accuracy: 0.9645 - loss: 0.1856
Epoch 7/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 385us/step - accuracy: 0.9510 - loss: 0.1551
Epoch 8/100
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366us/step - accuracy: 0.9747 - loss: 0.1029
Epoch 9/100
[1m24/24[0m [32m━

In [2]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn1.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

NameError: name 'nn1' is not defined

In [None]:
# Export our model to HDF5 file
# Define the file path for saving the model
filepath = "LungCancer.h5"

# Save the model to HDF5 format
nn1.save(filepath)

In [None]:
# K neighbors 
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))