In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

%matplotlib inline

print("Tensorflow version:", tf.__version__)
print("Numpy version:", np.__version__)
print("Pandas version:", pd.__version__)

# Pre-processing Breast Cancer Wisconsin
This notebook contains the preparation of the Breast Cancer Wisconsin (https://www.kaggle.com/datasets/uciml/breast-cancer-wisconsin-data) data set for our benchmarking.

In [None]:
# import
dataframe1 = pd.read_csv('../datasets/breastcancer.csv')

# replace B and M with numbers, 0 = B; 1 = M 
diagnosis = LabelEncoder()
dataframe1['diagnosis'] = diagnosis.fit_transform(dataframe1['diagnosis']) 

# drop first column id and last unnamed column
dataframe1.drop(columns =['id','Unnamed: 32'], axis=1, inplace=True)

# print samples
dataframe1.sample(10)

In [None]:
x_train, x_test, y_train, y_test = train_test_split(dataframe1.loc[:,dataframe1.columns != 'diagnosis'],
                                                    dataframe1['diagnosis'],
                                                    test_size=0.3)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)
x_train.head()

In [None]:
# Normalize
from sklearn import preprocessing
# Compute the mean and std to be used for later scaling.
preprocessParams = preprocessing.StandardScaler().fit(x_train)
# Perform standardization by centering and scaling.
x_train_normalized = preprocessParams.transform(x_train)
x_test_normalized = preprocessParams.transform(x_test)

print(x_train[:1])
print(x_train_normalized[:1] )


# categorial
from keras.utils import to_categorical
y_train_categorial = to_categorical(y_train)
y_test_categorial = to_categorical(y_test)

In [None]:
# save pre processed datasets
pd.DataFrame(x_train_normalized).to_csv('../datasets/breastcancer_train_input.csv', header=None, index=None)
pd.DataFrame(y_train).to_csv('../datasets/breastcancer_train_target.csv', header=None, index=None)
pd.DataFrame(x_test_normalized).to_csv('../datasets/breastcancer_test_input.csv', header=None, index=None)
pd.DataFrame(y_test).to_csv('../datasets/breastcancer_test_target.csv', header=None, index=None)

pd.DataFrame(y_train_categorial).to_csv('../datasets/breastcancer_train_target_categorical.csv', header=None, index=None)
pd.DataFrame(y_test_categorial).to_csv('../datasets/breastcancer_test_target_categorical.csv', header=None, index=None)

In [None]:
# test the data stt
model2 = keras.Sequential()
model2.add(layers.Dense(32, input_dim=30))
model2.add(layers.Activation('relu'))
model2.add(layers.Dense(16))
model2.add(layers.Activation('relu'))
model2.add(layers.Dense(8))
model2.add(layers.Activation('relu'))
model2.add(layers.Dense(1))
model2.add(layers.Activation('sigmoid'))

print(model2.summary())

sgd = keras.optimizers.Adam
model2.compile(loss='binary_crossentropy', optimizer='adam')

model2.fit(x_train, y_train, epochs=100)

print(x_test[1:2])

print(model2.predict(x_test[:1]))
print(model2.predict(x_test[1:2]))