# **CANCER BREAST CLASSIFIER WITH MACHINE LEARNING AND DEEP  LEARNING**

## Imports

In [None]:
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer, StandardScaler, MinMaxScaler, OrdinalEncoder, OneHotEncoder

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

## Load data and separate training and testing data

In [None]:
# Load the dataset
from sklearn.datasets import load_breast_cancer
df = load_breast_cancer()
X, y = df.data, df.target

# Get target names
target_names = df.target_names

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# View data
pd.concat([pd.DataFrame(df.data, columns = df.feature_names),
           pd.DataFrame(df.target, columns = ['target'])
], axis = 1)

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0


In [None]:
# Scale the features
std_scaler = StandardScaler()
X_train_std = std_scaler.fit_transform(X_train)
X_test_std = std_scaler.transform(X_test)

minmax_scaler = MinMaxScaler()
X_train_minmax = minmax_scaler.fit_transform(X_train)
X_test_minmax = minmax_scaler.transform(X_test)

# **Models**

# SkitLearn

In [None]:
# Import 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Define model
ml_model = KNeighborsClassifier()

# Fit Model
ml_model.fit(X_train_minmax, y_train)

# Making predictions
predictions = ml_model.predict(X_test_minmax)

# Evaluate
result = ml_model.score(X_test_minmax, y_test)
matrix = confusion_matrix(y_test, predictions)
report = classification_report(y_test, predictions)

# Score do modelo nos dados de teste
print("Accuracy: %.3f%%" % (result * 100.0))
print("\nConfusion Matrix\n",matrix)
print("\nReport\n", report)

Accuracy: 96.491%

Confusion Matrix
 [[41  2]
 [ 2 69]]

Report
               precision    recall  f1-score   support

           0       0.95      0.95      0.95        43
           1       0.97      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.96      0.96       114
weighted avg       0.96      0.96      0.96       114



In [None]:
# Import 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Define model
ml_model = LogisticRegression()

# Fit Model
ml_model.fit(X_train_std, y_train)

# Making predictions
predictions = ml_model.predict(X_test_std)

# Evaluate
result = ml_model.score(X_test_std, y_test)
matrix = confusion_matrix(y_test, predictions)
report = classification_report(y_test, predictions)

# Score do modelo nos dados de teste
print("Accuracy: %.3f%%" % (result * 100.0))
print("\nConfusion Matrix\n",matrix)
print("\nReport\n", report)

Accuracy: 97.368%

Confusion Matrix
 [[41  2]
 [ 1 70]]

Report
               precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114



In [None]:
# Import 
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

# Define model
ml_model = RandomForestClassifier()

# Fit Model
ml_model.fit(X_train, y_train)

# Making predictions
predictions = ml_model.predict(X_test)

# Evaluate
result = ml_model.score(X_test, y_test)
matrix = confusion_matrix(y_test, predictions)
report = classification_report(y_test, predictions)

# Score do modelo nos dados de teste
print("Accuracy: %.3f%%" % (result * 100.0))
print("\nConfusion Matrix\n",matrix)
print("\nReport\n", report)

Accuracy: 96.491%

Confusion Matrix
 [[40  3]
 [ 1 70]]

Report
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114



In [None]:
# Make predictions on new data

# X new
X_new = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]

# Scale the features
X_new = minmax_scaler.transform(X_new)

# Making predictions
predictions = ml_model.predict(X_new)
print(predictions.round(2))
print(target_names[predictions])

[1]
['benign']


# PyTorch

## Pytorch with sequential api

In [None]:
import torch
from torch import nn
import torch.optim as optim

In [None]:
# Convert the data into tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

In [None]:
# Define the model structure
in_size = X_train.shape[1]
hidden_size = 10
out_size = len(np.unique(df.target))

torch_model = nn.Sequential(
    nn.Linear(in_features = in_size, out_features = hidden_size),                 # input layer
    nn.ReLU(),                                                                    # activation function
    nn.Linear(in_features = hidden_size, out_features = out_size),                # hidden layer 1
    nn.LogSoftmax(dim=1)
)

In [None]:
# Move the model and data to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch_model.to(device)
X_train = X_train.to(device)
y_train = y_train.to(device)
X_test = X_test.to(device)
y_test = y_test.to(device)


In [None]:
# Define the loss function and optimizer
criterion = nn.NLLLoss()
optimizer = optim.Adam(torch_model.parameters(), lr=0.01)

In [None]:
# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    outputs = torch_model(X_train)
    loss = criterion(outputs, y_train)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss at every 10th epoch
    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/100], Loss: 0.2121
Epoch [20/100], Loss: 0.1013
Epoch [30/100], Loss: 0.0736
Epoch [40/100], Loss: 0.0619
Epoch [50/100], Loss: 0.0536
Epoch [60/100], Loss: 0.0476
Epoch [70/100], Loss: 0.0427
Epoch [80/100], Loss: 0.0383
Epoch [90/100], Loss: 0.0344
Epoch [100/100], Loss: 0.0309


In [None]:
# Evaluate the model on the test set
with torch.no_grad():
    outputs = torch_model(X_test)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)
    print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9825


In [None]:
# Make predictions on test data
with torch.no_grad():
    outputs = torch_model(X_test[0:5])
    _, predicted = torch.max(outputs.data, 1)
    #print(f"Predicted: {predicted}")

for i in predicted:
  print('Diagnosis:', target_names[i])

Diagnosis: benign
Diagnosis: malignant
Diagnosis: malignant
Diagnosis: benign
Diagnosis: benign


In [None]:
# Make predictions on new data

# X new
X_new = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]

# Scale the features
X_new = scaler.transform(X_new)

# Convert X_new to tensor
X_new = torch.tensor(X_new, dtype=torch.float32).to(device)

# Make prediction
with torch.no_grad():
    outputs = torch_model(X_new)
    _, predicted = torch.max(outputs.data, 1)
    #print(f"Predicted: {predicted}")

print('Diagnosis:',predicted)
print('Diagnosis:',target_names[predicted.item()])


Diagnosis: tensor([1])
Diagnosis: benign


## Pytorch with function api

In [None]:
# Define the class and foward function
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.softmax(out)
        return out


In [None]:
# Define the model
class_torch_model = Net(in_size, hidden_size, out_size).to(device)

# Define the loss function and optimizer
criterion = nn.NLLLoss()
optimizer = optim.Adam(class_torch_model.parameters(), lr=0.01)

In [None]:
# Train the model
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    outputs = class_torch_model(X_train)
    loss = criterion(outputs, y_train)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss at every 10th epoch
    if (epoch+1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [10/100], Loss: 0.2247
Epoch [20/100], Loss: 0.1063
Epoch [30/100], Loss: 0.0741
Epoch [40/100], Loss: 0.0618
Epoch [50/100], Loss: 0.0538
Epoch [60/100], Loss: 0.0470
Epoch [70/100], Loss: 0.0415
Epoch [80/100], Loss: 0.0367
Epoch [90/100], Loss: 0.0327
Epoch [100/100], Loss: 0.0290


In [None]:
# Evaluate the model on the test set
with torch.no_grad():
    outputs = class_torch_model(X_test)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)
    print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.9825


In [None]:
# Make predictions on new data

# X new
X_new = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]

# Scale the features
X_new = scaler.transform(X_new)

# Convert X_new to tensor
X_new = torch.tensor(X_new, dtype=torch.float32).to(device)

# Make prediction
with torch.no_grad():
    outputs = class_torch_model(X_new)
    _, predicted = torch.max(outputs.data, 1)
    #print(f"Predicted: {predicted}")

print('Diagnosis:',predicted)
print('Diagnosis:',target_names[predicted.item()])

Diagnosis: tensor([1])
Diagnosis: benign


# TensorFlow

## Tensorflow with sequential api

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation

In [None]:
# Define the model structure
in_size = X_train.shape[1]
hidden_size = 10
out_size = len(np.unique(df.target))

# Create TensorFlow model
tf_model = Sequential([
     Dense(hidden_size, input_shape=(in_size,), activation='relu'),   # Input layer
     Dense(hidden_size, activation='relu'),                           # Hidden layer
     Dense(out_size, activation='softmax')                            # Output layer
])

In [None]:
# Convert pytorch Tensor to numpy
X_train = X_train.data.to('cpu').numpy()
y_train = y_train.data.to('cpu').numpy()
X_test = X_test.data.to('cpu').numpy()
y_test = y_test.data.to('cpu').numpy()

In [None]:
# Compile the model
tf_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train TensorFlow model
tf_model.fit(X_train, y_train, epochs=100, batch_size=16, validation_split=0.01, verbose = 0)

# Evaluate the model with test data
eval = tf_model.evaluate(X_test, y_test)
print(' Loss:', eval[0], '\n', 'Acc:' , eval[1])

 Loss: 0.07615593820810318 
 Acc: 0.9824561476707458


In [None]:
# Make predictions using TensorFlow model
predictions_tf = tf_model.predict(X_test[0:5])
predictions_tf.round()

for i in range(len(predictions_tf)):
  print('Diagnosis:', target_names[np.argmax(predictions_tf[i])])

Diagnosis: benign
Diagnosis: malignant
Diagnosis: malignant
Diagnosis: benign
Diagnosis: benign


In [None]:
# Make predictions on new data

# X new
X_new = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]

# Scale the features
X_new = scaler.transform(X_new)

# Make predictions
predictions_tf = tf_model.predict(X_new)
#print(predictions_tf.round(4))
print('Diagnosis:', predictions_tf)
print('Diagnosis:', target_names[np.argmax(predictions_tf)])

Diagnosis: [[7.176476e-18 1.000000e+00]]
Diagnosis: benign


## Tensorflow with function api

In [None]:
# Define the model structure 
class Net(tf.keras.Model):
    def __init__(self, input_size, hidden_size, output_size):
        super(Net, self).__init__()
        self.fc1 = tf.keras.layers.Dense(hidden_size, activation='relu')
        self.fc2 = tf.keras.layers.Dense(output_size, activation='softmax')

    def call(self, x):
        out = self.fc1(x)
        out = self.fc2(out)
        return out

In [None]:
# Define the model
tf_class_model = Net(in_size, hidden_size, out_size)

# Define the loss function and optimizer
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01)

In [None]:
# Compile the model
tf_class_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train TensorFlow model
tf_class_model.fit(X_train, y_train, epochs=100, batch_size=16, validation_split=0.01, verbose = 0)

# Evaluate the model with test data
eval = tf_class_model.evaluate(X_test, y_test)
print(' Loss:', eval[0], '\n', 'Acc:' , eval[1])

 Loss: 0.06778409332036972 
 Acc: 0.9824561476707458


In [None]:
# Make predictions on new data

# X new
X_new = [[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]

# Scale the features
X_new = scaler.transform(X_new)

# Make predictions
predictions_tf = tf_class_model.predict(X_new)
#print(predictions_tf.round(4))
print('Diagnosis:', predictions_tf)
print('Diagnosis:', target_names[np.argmax(predictions_tf)])

Diagnosis: [[4.748192e-19 1.000000e+00]]
Diagnosis: benign
