In [None]:
! pip install -q kaggle
from google.colab import files
files.upload()
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets download -d aryashah2k/breast-ultrasound-images-dataset
!unzip breast-ultrasound-images-dataset.zip

In [None]:
import numpy as np
import glob as gb
import keras
import keras.layers as layers
import matplotlib.pyplot as plt
import os
import cv2
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout, Activation, MaxPooling2D
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split, KFold
from tensorflow.keras.layers import BatchNormalization
from matplotlib import pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix

In [None]:
path = "Dataset_BUSI_with_GT/"

def encode(f):
    labels = {'malignant': 0, 'benign': 1, 'normal': 2}
    return labels[f]

imageSize = 180


X = []
Y = []

for folder in os.listdir(path):
  for file in gb.glob(path + folder + "/*.png"):
      if "mask" not in file:
        image = cv2.imread(file)
        image = cv2.resize(image, (imageSize, imageSize))
        image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # feature extraction
        image_blur = cv2.GaussianBlur(image_gray, (7, 7), 0)
        _, thresh_image = cv2.threshold(image_blur, 100, 255, cv2.THRESH_BINARY)
        contours, hierarchy = cv2.findContours(thresh_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        image_contour = cv2.drawContours(image_gray, contours, -1, (0, 255, 0), 2)


        X.append(image_contour)
        Y.append(encode(folder))



X = np.array(X)

# Normalization
X = X/255.0

Y = np.array(Y)



In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2, shuffle= False)

# **CNN Models**

In [None]:
"""Custom CNN Archeticure"""

CNNmodel = Sequential(
    [
        Conv2D(64, (3, 3), input_shape=(imageSize, imageSize, 1), padding='same', activation='relu'),
        MaxPool2D(pool_size=(2, 2)),
        Conv2D(64, (3, 3), padding = 'same', activation='relu'),
        MaxPool2D(pool_size=(2, 2)),
        Conv2D(128, (3, 3), padding='same', activation='relu'),
        MaxPool2D(pool_size=(2, 2)),
        Conv2D(128, (3, 3), padding='same', activation='relu'),
        MaxPool2D(pool_size=(2, 2)),
        Conv2D(256, (3, 3), padding='same', activation='relu'),
        MaxPool2D(pool_size=(2, 2)),
        Flatten(),
        Dense(units=512, activation='relu'),
        Dense(units=256, activation='relu'),
        Dense(units=128, activation='relu'),
        Dense(units=3, activation='softmax'),
    ]

)

kf = KFold(n_splits=5)
k = 0
scores = []
inputs = np.concatenate((x_train, x_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)



for train, test in kf.split(inputs, targets):
    k= k + 1
    CNNmodel.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    history = CNNmodel.fit(inputs[train], targets[train], epochs=10, validation_data=(inputs[test], targets[test]))
    _, score = CNNmodel.evaluate(inputs[test], targets[test])
    model_json = CNNmodel.to_json()
    with open(str(k)+".json", "w") as json_file:
      json_file.write(model_json)
      CNNmodel.save_weights(str(k)+".h5")
      json_file.close()

    # summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(str(k)+" Accuracy"+'.png', dpi=1000)
    plt.show()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(str(k)+" Loss"+'.png', dpi=1000)
    plt.show()

    scores.append(score)


print(np.mean(scores))


In [None]:
"""letnet-5 Archeticure"""

letnet_5 = keras.Sequential()
letnet_5.add(layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(180,180,1)))
letnet_5.add(layers.AveragePooling2D())
letnet_5.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
letnet_5.add(layers.AveragePooling2D())
letnet_5.add(layers.Flatten())
letnet_5.add(layers.Dense(units=120, activation='relu'))
letnet_5.add(layers.Dense(units=84, activation='relu'))
letnet_5.add(layers.Dense(units=3, activation = 'softmax'))
letnet_5.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
letnet_5.summary()




kf = KFold(n_splits=5)
k = 0
scores = []
inputs = np.concatenate((x_train, x_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)


for train, test in kf.split(inputs, targets):
    k= k + 1
    letnet_5.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    history = letnet_5.fit(inputs[train], targets[train], epochs=10, validation_data=(inputs[test], targets[test]))
    _, score = letnet_5.evaluate(inputs[test], targets[test])
    model_json = letnet_5.to_json()

    with open(str(k)+".json", "w") as json_file:
      json_file.write(model_json)
      letnet_5.save_weights(str(k)+".h5")
      json_file.close()

    # summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(str(k)+" Accuracy"+'.png', dpi=1000)
    plt.show()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(str(k)+" Loss"+'.png', dpi=1000)
    plt.show()
    scores.append(score)

print(np.mean(scores))




In [None]:
"""AlexNet Archeticure"""


#1st Convolutional Layer
AlexNet = Sequential()
AlexNet.add(Conv2D(filters=96, input_shape=(180,180,1), kernel_size=(11,11), strides=(4,4), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
AlexNet.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

#2nd Convolutional Layer
AlexNet.add(Conv2D(filters=256, kernel_size=(5, 5), strides=(1,1), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
AlexNet.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

#3rd Convolutional Layer
AlexNet.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))

#4th Convolutional Layer
AlexNet.add(Conv2D(filters=384, kernel_size=(3,3), strides=(1,1), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))

#5th Convolutional Layer
AlexNet.add(Conv2D(filters=256, kernel_size=(3,3), strides=(1,1), padding='same'))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))
AlexNet.add(MaxPooling2D(pool_size=(2,2), strides=(2,2), padding='same'))

#Passing it to a Fully Connected layer
AlexNet.add(Flatten())

# 1st Fully Connected Layer
AlexNet.add(Dense(4096, input_shape=(32,32,3,)))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))

# Add Dropout to prevent overfitting
AlexNet.add(Dropout(0.4))


#2nd Fully Connected Layer
AlexNet.add(Dense(4096))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))

#Add Dropout
AlexNet.add(Dropout(0.4))

#3rd Fully Connected Layer
AlexNet.add(Dense(1000))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('relu'))

#Add Dropout
AlexNet.add(Dropout(0.4))

#Output Layer
AlexNet.add(Dense(3))
AlexNet.add(BatchNormalization())
AlexNet.add(Activation('softmax'))

#Model Summary
AlexNet.summary()


kf = KFold(n_splits=5)
k = 0
scores = []
inputs = np.concatenate((x_train, x_test), axis=0)
targets = np.concatenate((y_train, y_test), axis=0)


for train, test in kf.split(inputs, targets):
    k= k + 1
    AlexNet.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    history = AlexNet.fit(inputs[train], targets[train], epochs=10, validation_data=(inputs[test], targets[test]))
    _, score = AlexNet.evaluate(inputs[test], targets[test])
    model_json = AlexNet.to_json()
    with open(str(k)+".json", "w") as json_file:
      json_file.write(model_json)
      AlexNet.save_weights(str(k)+".h5")
      json_file.close()



    # summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(str(k)+" Accuracy"+'.png', dpi=1000)
    plt.show()

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.savefig(str(k)+" Loss"+'.png', dpi=1000)
    plt.show()

    scores.append(score)

print(np.mean(scores))


# **Machine Learning Classifiers**

In [None]:
x_train1 = x_train.reshape(x_train.shape[0], -1)
x_test1 = x_test.reshape(x_test.shape[0], -1)

In [None]:
"""Support Vector Machine (SVM)"""

clf = svm.SVC(kernel='linear', gamma='auto')
clf.fit(x_train1, y_train)

y_pred = clf.predict(x_test1)
print("SVM Accuracy =", accuracy_score(y_test, y_pred))
print("SVM Precision Score = ", precision_score(y_test, y_pred, average='micro'))
print("SVM Recall Score = ", recall_score(y_test, y_pred, average='micro'))

svm_matrix = confusion_matrix(y_test, y_pred)
ax = sns.heatmap(svm_matrix, annot=True, cmap='Blues')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['Benign','Malignant','Normal'])
ax.yaxis.set_ticklabels(['Benign','Malignant','Normal'])


## Display the visualization of the Confusion Matrix.
plt.show()
plt.savefig('SVM.png', dpi=1000)

In [None]:
"""Naive Bayes"""

gnb = GaussianNB()
gnb.fit(x_train1, y_train)
gnb_predictions = gnb.predict(x_test1)

print("Naive Bayes Accuracy =", accuracy_score(y_test, gnb_predictions))
print("Precision Score = ", precision_score(y_test, gnb_predictions, average='micro'))
print("Recall Score = ", recall_score(y_test, gnb_predictions, average='micro'))


gnb_matrix = confusion_matrix(y_test, gnb_predictions)
ax = sns.heatmap(gnb_matrix, annot=True, cmap='Blues')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');


## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['Benign','Malignant','Normal'])
ax.yaxis.set_ticklabels(['Benign','Malignant','Normal'])



## Display the visualization of the Confusion Matrix.
plt.show()
plt.savefig('GNB.png', dpi=1000)

In [None]:
"""k-Nearest Neighbors"""

knn = KNeighborsClassifier(n_neighbors=9)
knn.fit(x_train1, y_train)
knn_predictions = knn.predict(x_test1)

print("KNN Accuracy =", accuracy_score(y_test, knn_predictions))
print("KNN Precision Score = ", precision_score(y_test, knn_predictions, average='micro'))
print("KNN Recall Score = ", recall_score(y_test, knn_predictions, average='micro'))


knn_matrix = confusion_matrix(y_test, knn_predictions)
ax = sns.heatmap(knn_matrix, annot=True, cmap='Blues')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');


## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['Benign','Malignant','Normal'])
ax.yaxis.set_ticklabels(['Benign','Malignant','Normal'])



## Display the visualization of the Confusion Matrix.
plt.show()
plt.savefig('KNN.png', dpi=1000)


In [None]:
"""Decision Tree"""

DT = DecisionTreeClassifier()
DT = DT.fit(x_train1, y_train)
y_predicted = DT.predict(x_test1)

print("Decision tree Accuracy =", accuracy_score(y_test, y_predicted))
print("Decision tree Precision Score = ", precision_score(y_test, y_predicted, average='micro'))
print("Decision tree Score = ", recall_score(y_test, y_predicted, average='micro'))

DT_matrix = confusion_matrix(y_test, y_predicted)
ax = sns.heatmap(DT_matrix, annot=True, cmap='Blues')
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['Benign','Malignant','Normal'])
ax.yaxis.set_ticklabels(['Benign','Malignant','Normal'])

## Display the visualization of the Confusion Matrix.
plt.show()
plt.savefig('DT.png', dpi=1000)