In [None]:
from Tumor_Classifier_Utils import *

In [None]:
# loading in data
train, train_y, val, val_y, test, test_y, N, num_classes = load_data(64)


print(train.shape)
print(val.shape)
print(test.shape)
print(train_y.shape)
print(val_y.shape)
print(test_y.shape)

In [None]:
train_PCA = np.reshape(train[:,:,:,:], (2500, -1))
val_PCA = np.reshape(val[:,:,:,:], (500, -1))
test_PCA = np.reshape(test[:,:,:,:], (264, -1))

print(train_PCA.shape)
print(val_PCA.shape)
print(test_PCA.shape)

In [None]:
pca = decomposition.PCA()
pca.fit(train_PCA)

In [None]:
fig = plt.figure(figsize=[20, 8])
for i in range(20):
    ax = fig.add_subplot(4, 5, i + 1)
    pc = np.reshape(pca.components_[i*10,:], (N, N)) 
    ax.imshow(pc)
    ax.set_title("PCA Components: " + str(i*10))
    plt.axis('off')

In [None]:
transformed_train = pca.transform(train_PCA)
transformed_val = pca.transform(val_PCA)
transformed_test = pca.transform(test_PCA)

In [None]:
cumulative_sum = np.cumsum(pca.explained_variance_ratio_, axis=0)
top90 = np.where(cumulative_sum > 0.90)[0][0]
top95 = np.where(cumulative_sum > 0.95)[0][0]
top99 = np.where(cumulative_sum > 0.99)[0][0]

In [None]:
plot_cumulative_sum(cumulative_sum, top90, top95, top99)

In [None]:
transformed_train_95 = transformed_train
transformed_train_95[:,top95+1:] = 0
print(transformed_train_95.shape)

transform_top95_generate = pca.inverse_transform(transformed_train_95)
print(transform_top95_generate.shape)

fig = plt.figure(figsize=[20, 8])
for i in range(20):
    ax = fig.add_subplot(4, 10, 2*i + 1)
    ax.imshow(np.reshape(train_PCA[i,:], (N,N)))
    ax.set_title('Original')
    ax.axis('off')
    ax = fig.add_subplot(4, 10, 2*i + 2)
    pc = np.reshape(transform_top95_generate[i,:], (N, N)) 
    ax.imshow(pc)
    ax.set_title('PCA')
    ax.axis('off')

In [None]:
# choose to use 95% recreation
transformed_train_95 = transformed_train[:, 0:top95]
transformed_val_95 = transformed_val[:, 0:top95]
transformed_test_95 = transformed_test[:, 0:top95]

print(transformed_train_95.shape)
print(train_y.shape)

In [None]:
lda = discriminant_analysis.LinearDiscriminantAnalysis()
lda.fit(np.array(transformed_train_95), np.array(train_y))

transformed_train_LDA = lda.transform(transformed_train_95)
transformed_val_LDA = lda.transform(transformed_val_95)
transformed_test_LDA = lda.transform(transformed_test_95)

In [None]:
print(np.shape(transformed_train_LDA))

In [None]:
def search_hyperparams(model, params, X_train, Y_train, X_validate, Y_validate):
    # Create list of all possible combinations
    param_list = list(ParameterGrid(params))
    
    # Initialising 
    best_result = 0.00;
    best_params = param_list[0];
    worst_result = 1.00;
    worst_params = param_list[0];
    # looping through all parameters in parameter list
    for params in param_list:
        # creating model with set parameters
        model = model.set_params(**params)
        # training the model
        model.fit(X_train, Y_train)
        # retrieving model score
        result = model.score(X_validate, Y_validate)
        # checking if model score is better, then allocating best parameters
        if result > best_result:
            best_result = result
            best_params = params
        if result < worst_result:
            worst_result = result
            worst_params = params
    
    # Return the best
    print(best_params)
    print("Validation Accuracy " + str(best_result))
    print(worst_params)
    print("Validation Accuracy " + str(worst_result))
    return best_params

In [None]:
param_grid = {'class_weight': ['balanced'], 'decision_function_shape': ['ovo', 'ovr'], 'C': list(range(1, 30)), 'gamma': [0.00005, 0.0001, 0.0003, 0.0005], 'kernel' : ['rbf', 'sigmoid']}

# Find Optimal Hyperparameters and then create model
model = SVC()
best_params2 = search_hyperparams(model, param_grid, transformed_train_LDA, train_y, transformed_val_LDA, val_y)

svm = model.set_params(**best_params2)
svm.fit(transformed_train_LDA, train_y)

In [None]:
def eval_model(model, X_train, Y_train, X_test, Y_test):
    labels = ['No Tumour', 'Glioma', 'Meningioma', 'Pituitary']
    fig = plt.figure(figsize=[25, 8])
    ax = fig.add_subplot(1, 2, 1)
    conf = ConfusionMatrixDisplay.from_estimator(model, X_train, Y_train, normalize='true', ax=ax, display_labels=labels)
    #conf.ax_.set_title('Training Set Performance: %1.3f' % (sum(model.predict(X_train) == Y_train)/len(Y_train)));
    ax = fig.add_subplot(1, 2, 2)
    conf = ConfusionMatrixDisplay.from_estimator(model, X_test, Y_test, normalize='true', ax=ax, display_labels=labels)
    #conf.ax_.set_title('Testing Set Performance: %1.3f' % (sum(model.predict(X_test) == Y_test)/len(Y_test)));
    print(classification_report(Y_test, model.predict(X_test)))

In [None]:
eval_model(svm, transformed_train_LDA, train_y, transformed_test_LDA, test_y)