In [None]:
# Basic Imports
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Preprocessing tools
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import ColumnTransformer
# Deep learning tols
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn import set_config
set_config(transform_output='pandas')



In [None]:
# Load data from your path
df = pd.read_csv("cancer.csv", index_col=0)
df.head()



In [None]:
# Check for null values
df.isna().sum().sum()



In [None]:
# Define X and y
target = 'diagnosis'
X = df.drop(columns = target)
y = df[target]
# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)



In [None]:
# Convert target to numbers
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)
y_train_enc



In [None]:
# Convert encoded target values to a series
y_train_enc = pd.Series(le.transform(y_train), index=y_train.index,
             name=target)
y_test_enc = pd.Series(le.transform(y_test), index=y_test.index,
             name=target)
y_train_enc



In [None]:
# Check class balance
y_train_enc.value_counts(normalize = True)


In [None]:
# Scale data
scaler = StandardScaler()
scaler.fit(X_train)
X_train_tf = scaler.transform(X_train)
X_test_tf = scaler.transform(X_test)



In [None]:
# Step 1: Define our network structure
# Save the number of features we have as our input shape
input_shape = X_train_tf.shape[1]
input_shape


In [None]:
# Sequential model
model = Sequential()


In [None]:
# First hidden layer
model.add(Dense(30, # How many neurons you have in your first hidden layer
                input_dim = input_shape, # What is the shape of your input features (number of columns)
                activation = 'relu')) # What activation function are you using?
# Second hidden layer
model.add(Dense(10, # How many neurons you have in your second hidden layer
                activation = 'relu')) # What activation function are you using?



In [None]:
# Output layer
model.add(Dense(1, activation = 'sigmoid'))



In [None]:
# Output layer
model.add(Dense(1, activation = 'sigmoid'))



In [None]:
# Step 2: Compile
model.compile(loss = 'bce', optimizer = 'adam',)



In [None]:
# Reviewing the model summary before training
model.summary()



In [None]:
# Step 3: Fit our model
history = model.fit(X_train_tf, y_train_enc,
                    validation_data = (X_test_tf, y_test_enc), 
                    epochs=10)


In [None]:
# History.history is a dictionary
history.history.keys()



In [None]:
# Metric value for every training epoch
history.history['loss']



In [None]:
# Visualize the loss
fig, ax = plt.subplots()
ax.plot(history.history['loss'], label='Train loss', marker='o')
ax.plot(history.history['val_loss'], label='Validation Loss', marker='o')
ax.legend();



In [None]:
# Sequential model
model = Sequential()

# First hidden layer
model.add(Dense(30, # How many neurons you have in your first hidden layer
                input_dim = input_shape, # What is the shape of your input features (number of columns)
                activation = 'relu')) # What activation function are you using?
# Second hidden layer
model.add(Dense(10, # How many neurons you have in your second hidden layer
                activation = 'relu')) # What activation function are you using?
# Output layer
model.add(Dense(1, activation = 'sigmoid'))



In [None]:
# Metrics are specified during the .compile step
# Step 2: Compile
model.compile(loss = 'bce', optimizer = 'adam'
              , metrics=['accuracy',
                         tf.keras.metrics.Recall(name='recall'),
                         tf.keras.metrics.Precision(name='precision'),
                        ])
model.summary()



In [None]:
# Step 3: Fit our model
history = model.fit(X_train_tf, y_train_enc,
                    validation_data = (X_test_tf, y_test_enc), 
                    epochs=10)



In [None]:
# Convert the history to a dataframe for readability
history_df = pd.DataFrame(history.history)
history_df



In [None]:
# Custom function for plotting each metric
def plot_history(history, figsize=(6,12), marker='o'):
       
    # Get list of metrics from history
    metrics = [c for c in history.history if not c.startswith('val_')]
    
    ## Separate row for each metric
    fig, axes = plt.subplots(nrows=len(metrics),figsize=figsize)
    
    # For each metric
    for i, metric_name in enumerate(metrics):
    
        # Get the axis for the current metric
        ax = axes[i]
    
        # Get metric from history.history
        metric_values = history.history[metric_name]
        # Get epochs from history
        epochs = history.epoch
    
        # Plot the training metric
        ax.plot(epochs, metric_values, label=metric_name, marker=marker)
    
        ## Check if val_{metric} exists. if so, plot:
        val_metric_name = f"val_{metric_name}"
        if val_metric_name in history.history:
            # Get validation values and plot
            metric_values = history.history[val_metric_name]
            ax.plot(epochs,metric_values,label=val_metric_name, marker=marker)
    
        # Final subplot adjustments 
        ax.legend()
        ax.set_title(metric_name)
    fig.tight_layout()

    return fig, axes



In [None]:
# Call the custom function
plot_history(history);



In [None]:
# Sequential model
model = Sequential()

# First hidden layer
model.add(Dense(30, # How many neurons you have in your first hidden layer
                input_dim = input_shape, # What is the shape of your input features (number of columns)
                activation = 'relu')) # What activation function are you using?
# Second hidden layer
model.add(Dense(10, # How many neurons you have in your second hidden layer
                activation = 'relu')) # What activation function are you using?
# Output layer
model.add(Dense(1, activation = 'sigmoid'))

### Metrics are specified during the.compile step
# Step 2: Compile
model.compile(loss = 'bce', optimizer = 'adam'
              , metrics=['accuracy',
                         tf.keras.metrics.Recall(name='recall'),
                         tf.keras.metrics.Precision(name='precision'),
                        ])
model.summary()



In [None]:
# Step 3: Fit our model
history = model.fit(X_train_tf, y_train_enc,
                    validation_split=.2, 
                    epochs=10)

plot_history(history);



In [None]:
# Evaluate neural network with builtin evaluation
result = model.evaluate(X_test_tf, y_test_enc,return_dict=True)
result



In [None]:
# make predictions
y_pred_test = model.predict(X_test_tf)
y_pred_test[:5]



In [None]:
# round the predictions
y_pred_test = np.round(y_pred_test)
y_pred_test[:5]



In [None]:
from sklearn.metrics import classification_report,ConfusionMatrixDisplay
print(classification_report(y_test_enc, y_pred_test))
 
ConfusionMatrixDisplay.from_predictions(y_test_enc, y_pred_test, cmap='Blues',
                                       normalize='true');

