In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Define a function to Describe Features extracted
def describeData(X,y):
    print('Total number of images: {}'.format(len(X)))
    print('Number of Benign Images: {}'.format(np.sum(y==0)))
    print('Number of Malignant Images: {}'.format(np.sum(y==1)))
    print('Percentage of positive images: {:.2f}%'.format(100*np.mean(y)))
    print('Image shape (Samples, Rows, Columns, Features): {}'.format(X[0].shape))
    print()

In [3]:
# Spliting the features into training and testing set at 80%/20% ratio
from sklearn.model_selection import GroupShuffleSplit     

def train_test_group_split(X, y, p):
    gss = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=2)
    for train_index, test_index in gss.split(X, y, p):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        p_train, p_test = p[train_index], p[test_index]
    return X_train, y_train, X_test, y_test, p_train, p_test

def feature_dimension(model, factor):
    X = np.load('../data/features_' + model + '/' + str(factor)+'/X.npy')
    y = np.load('../data/features_' + model + '/' + str(factor)+'/y.npy')
    p = np.load('../data/features_' + model + '/' + str(factor)+'/p.npy')
    print('Discription of features extracted from ' + str(factor) + 'x images by model ' + model + ":")
    X_train, y_train, X_test, y_test, p_train, p_test = train_test_group_split(X, y, p)
    print('Training Set')
    describeData(X_train, y_train)
    print('Testing Set')
    describeData(X_test, y_test)
    print("-----------------------------------------------------------------------")
    
# Showing the features dimension after train_test_split
magnification_factors = ['40', '100', '200', '400']
models_list = ['xception','vgg16','vgg19']
for factor in magnification_factors:
    for model in models_list:
        feature_dimension(model, factor)

Discription of features extracted from 40x images by model xception:
Training Set
Total number of images: 1589
Number of Benign Images: 485
Number of Malignant Images: 1104
Percentage of positive images: 69.48%
Image shape (Samples, Rows, Columns, Features): (1, 10, 10, 2048)

Testing Set
Total number of images: 406
Number of Benign Images: 140
Number of Malignant Images: 266
Percentage of positive images: 65.52%
Image shape (Samples, Rows, Columns, Features): (1, 10, 10, 2048)

-----------------------------------------------------------------------
Discription of features extracted from 40x images by model vgg16:
Training Set
Total number of images: 1589
Number of Benign Images: 485
Number of Malignant Images: 1104
Percentage of positive images: 69.48%
Image shape (Samples, Rows, Columns, Features): (1, 7, 7, 512)

Testing Set
Total number of images: 406
Number of Benign Images: 140
Number of Malignant Images: 266
Percentage of positive images: 65.52%
Image shape (Samples, Rows, Colum

In [7]:
# Classification Performance
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix


# Test Model Performance on Testing Set
def test_performance(X_test, y_test, model):
    # Classification report
    labels = ["Benign", "Malignant"]
    y_predict = model.predict(X_test)
    print(classification_report(y_test, y_predict, target_names=labels))
    print("F1 Score: {}".format(f1_score(y_test, y_predict)))
    # ROC curve
    y_predict_proba = model.predict_proba(X_test)
    fpr, tpr, _ = metrics.roc_curve(y_test, y_predict_proba[:,1])
    roc_auc = metrics.auc(fpr, tpr)
    plt.figure()
    lw = 2
    plt.plot(fpr, tpr, color='darkorange',
           lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()

    
# Function returning image level pre_trained model performance
def Model_Performance(model, magnification_factor):
    # Make Features from Training Set 1D for compatability with standard classifiers
    X=np.load('../data/features_' + model + '/' + magnification_factor +'/X.npy')
    X_Shape = X.shape[1]*X.shape[2]*X.shape[3]*X.shape[4]
    X_Flat = X.reshape(X.shape[0], X_Shape)
    y = np.load('../data/features_' + model  + '/' + magnification_factor+'/y.npy')
    p = np.load('../data/features_' + model  + '/' + magnification_factor+'/p.npy')
    # Split into training and testing set with defined
    X_trainFlat, y_train, X_testFlat, y_test, p_train, p_test = train_test_group_split(X_Flat, y, p)
    # Hyperparameter tuning
    # optimized_classifier(X_trainFlat, y_train, p_train, 'LR' )
    # classifier = optimized_classifier(X_trainFlat, y_train, p_train, 'SVM')
    # Classification Performance on testing data
    print('Performance of pre-trained CNN model ' + model + ' for images at ' + magnification_factor + 'x:')
    classifier = LogisticRegression(solver='liblinear').fit(X_trainFlat, y_train)
    test_performance(X_testFlat, y_test, classifier)
    
for factor in magnification_factors:
    Model_Performance('xception', factor)
    Model_Performance('vgg16', factor)
    Model_Performance('vgg19', factor)
    print("-------------------------------------------------------------------")

Performance of pre-trained CNN model xception for images at 40x:
              precision    recall  f1-score   support

      Benign       0.72      0.65      0.68       140
   Malignant       0.82      0.86      0.84       266

   micro avg       0.79      0.79      0.79       406
   macro avg       0.77      0.76      0.76       406
weighted avg       0.79      0.79      0.79       406

F1 Score: 0.8440366972477065


NameError: name 'metrics' is not defined

In [None]:
# Patient level accuracy    
def patient_level_accuracy(X_train, y_train, X_test, y_test):
    """Run LogisticRegression as Classifier"""   
    clf = model.fit(X_train, y_train)
    preds = clf.predict(X_test)
    print(classification_report(y_test, preds, target_names=labels))
    print("F1 Score: {}".format(f1_score(d, preds)))
    #print("acc: %.02f" % accuracy_score(d, preds))
    #kfold = model_selection.KFold(n_splits=10)
    #f1= model_selection.cross_val_score(model, c,d, cv=kfold, scoring='f1')
    #mean = f1.mean() 
    #stdev = f1.std()
    #print('F1 score: %s (%s)' % (mean, stdev))
    print('')

In [None]:
# Different Candidate Classification Algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import GaussianNB

SVM_parameters = {'kernel':('linear', 'rbf'), 
                  'C':(1, 10)}

LR_parameters = {'penalty': ('l1', 'l2'),
                 'C' : (0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000),
                 'class_weight' : ({1:0.5, 0:0.5}, {1:0.4, 0:0.6}, {1:0.6, 0:0.4}),
                 'solver' : ('liblinear', 'saga')}

classifiers = {'LR': (LogisticRegression(), LR_parameters),
               'LDA': LinearDiscriminantAnalysis(),
               'DTC': DecisionTreeClassifier(),
               'RF': RandomForestClassifier(),
               'GBC': GradientBoostingClassifier(),
               'KNN': KNeighborsClassifier(),
               'SVM': (SVC(), SVM_parameters),
               'LSVM': LinearSVC(),
               'GNB': GaussianNB()}
                      

# Tuning hyperparameter to get optimized classifier using GridSearchCV
def optimized_classifier(X_train, y_train, p_train, classifier):
    """Function that use logistic regression as classifier and return Cross-validated F1 Score"""
    # Hyperparameter Tuning using group k folds cross validation
    group_kfold = GroupKFold(n_splits=5)
    parameters = classifiers[classifier][1]
    model = classifiers[classifier][0]
    grid = GridSearchCV(estimator=model, 
                       param_grid=parameters,
                       cv=group_kfold,
                       scoring=['roc_auc','f1'],
                       verbose=1,
                       n_jobs=-1,
                       refit='f1')
    grid_result = grid.fit(X_train, y_train, p_train) 
    # summarize results
    print("Best F1 score: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    return grid_result.best_estimator_

In [None]:
# Tuning hyperparameter to get optimized classifier using RandomizedSearchCV
loss = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron']
penalty = ['l1', 'l2', 'elasticnet']
alpha = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
learning_rate = ['constant', 'optimal', 'invscaling', 'adaptive']
class_weight = [{1:0.5, 0:0.5}, {1:0.4, 0:0.6}, {1:0.6, 0:0.4}, {1:0.7, 0:0.3}]
eta0 = [1, 10, 100]

param_distributions = dict(loss=loss,
                           penalty=penalty,
                           alpha=alpha,
                           learning_rate=learning_rate,
                           class_weight=class_weight,
                           eta0=eta0)

random = RandomizedSearchCV(estimator=sgd,
                            param_distributions=param_distributions,
                            scoring='roc_auc',
                            verbose=1, n_jobs=-1,
                            n_iter=1000)
random_result = random.fit(X_train, y_train)

print('Best Score: ', random_result.best_score_)
print('Best Params: ', random_result.best_params_)

# Using Logistic Regression as Classifier
def runLogisticRegression(a,b,c,d):
    """Run LogisticRegression as Classifier"""
    model = LogisticRegression(solver='liblinear')
    clf = model.fit(a,b)
    preds = clf.predict(c)
    print(classification_report(d, preds, target_names=labels))
    print("F1 Score: {}".format(f1_score(d, preds)))
    #print("acc: %.02f" % accuracy_score(d, preds))
    #kfold = model_selection.KFold(n_splits=10)
    #mean = f1.mean() 
    #stdev = f1.std()
    #print('F1 score: %s (%s)' % (mean, stdev))
    print('')
    
# Test the Function
X = np.load('../data/features_xception/40/X.npy')
X_Shape = X.shape[1]*X.shape[2]*X.shape[3]*X.shape[4]
X_Flat = X.reshape(X.shape[0], X_Shape)
y = np.load('../data/features_xception/40/y.npy')
p = np.load('../data/features_xception/40/p.npy')
optimized_classifier(X_Flat, y, p, 'LR' )

In [None]:
def defineClassifiers():
    """
    This function just defines each abbreviation used in the previous function (e.g. LR = Logistic Regression)
    """
    print('')
    print('LR = LogisticRegression')
    print('RF = RandomForestClassifier')
    print('KNN = KNeighborsClassifier')
    print('SVM = Support Vector Machine SVC')
    print('LSVM = LinearSVC')
    print('GNB = GaussianNB')
    print('DTC = DecisionTreeClassifier')
    #print('GBC = GradientBoostingClassifier')
    #print('LDA = LinearDiscriminantAnalysis')
    print('')
    return
defineClassifiers()
    
def compareABunchOfDifferentModelsAccuracy(a,b,c,d):
    """
    compare performance of classifiers on X_train, X_test, Y_train, Y_test
    http://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html#sklearn.metrics.accuracy_score
    http://scikit-learn.org/stable/modules/model_evaluation.html#accuracy-score
    """    
    print('')
    print('Compare Multiple Classifiers:')
    print('')
    print('K-Fold Cross-Validation Accuracy:')
    print('')      
    resultsAccuracy = []
    names = []
    for name, model in classifiers:
        model.fit(a, b)
        kfold = model_selection.KFold(n_splits=10)
        accuracy_results = model_selection.cross_val_score(model, c, d, cv=kfold, scoring='accuracy')
        resultsAccuracy.append(accuracy_results)
        names.append(name)
        accuracyMessage = "%s: %f (%f)" % (name, accuracy_results.mean(), accuracy_results.std())
        print(accuracyMessage)  
    # boxplot algorithm comparison
    fig = plt.figure()
    fig.suptitle('Algorithm Comparison: Accuracy')
    ax = fig.add_subplot(111)
    plt.boxplot(resultsAccuracy)
    ax.set_xticklabels(names)
    ax.set_ylabel('Cross-Validation: Accuracy Score')
    plt.show()
    return

compareABunchOfDifferentModelsAccuracy(X_trainFlat, Y_train, X_testFlat, Y_test)

In [None]:
for i in range(len(X)):
    x=GlobalAveragePooling2D()(x)
    x=Dense(1024,activation='relu')(x) #we add dense layers so that the model can learn more complex functions and classify for better results.
    x=Dense(1024,activation='relu')(x) #dense layer 2
    x=Dense(512,activation='relu')(x) #dense layer 3
    preds=Dense(2,activation='softmax')(x) #final layer with softmax activation

def classification_training(train_or_test, feature_extractor, factor):
    X = np.load('../data/features_' + model + '/'+ train_or_test + '/' + str(factor)+'/X.npy')
    y = np.load('../data/features_' + model + '/'+ train_or_test + '/' + str(factor)+'/y.npy')