In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import tensorboard
from torch.utils.tensorboard import SummaryWriter
import os
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score, classification_report
from NN_MultiLayer import NeuralNetwork_MultiLayer


In [4]:
# Set wkdir
WORK_DIR = os.getcwd()

# Import training, validation and test sets
trainingDataDF = pd.read_excel(f'{WORK_DIR}/GeneExpressionCancer_training.xlsx').to_numpy()
validationDataDF = pd.read_excel(f'{WORK_DIR}/GeneExpressionCancer_validation.xlsx').to_numpy()
testDataDF = pd.read_excel(f'{WORK_DIR}/GeneExpressionCancer_test.xlsx').to_numpy()

In [5]:
# Separate into labels/features
trainingLabels = trainingDataDF[:,-1]
trainingFeatures = trainingDataDF[:,:-1]
validationLabels = validationDataDF[:,-1]
validationFeatures = validationDataDF[:,:-1]
testLabels = testDataDF[:,-1]
testFeatures = testDataDF[:,:-1]

In [6]:
# Normalizing features; fit/transform on training set then transform validation
scaler = sklearn.preprocessing.StandardScaler()
trainingData_scaled = scaler.fit_transform(trainingFeatures)
validationData_scaled = scaler.transform(validationFeatures)
testData_scaled = scaler.transform(testFeatures)

# Neural Network

In [7]:
%load_ext tensorboard

In [8]:
# Instantiate NN
nn_multiLayer = NeuralNetwork_MultiLayer(nFeatures=len(trainingDataDF[:, :-1][0]))

In [11]:
# Train NN
nn_multiLayer = nn_multiLayer.trainModel(
    trainingFeatures=trainingFeatures, 
    trainingLabels=trainingLabels, 
    validationFeatures=validationFeatures, 
    validationLabels=validationLabels)


AssertionError: Torch not compiled with CUDA enabled

In [None]:
# Train logistic regression model with test set
logisticReg = LogisticRegression()
logisticReg.fit(trainingData_scaled, trainingLabels)

In [None]:
# Make predictions on validation set
validation_ypred = logisticReg.predict(validationData_scaled)
validation_yscore = logisticReg.predict_proba(validationData_scaled)[:,1]

In [None]:
# Make predictions on test set
test_ypred = logisticReg.predict(testData_scaled)
test_yscore = logisticReg.predict_proba(testData_scaled)[:,1]

In [None]:
def logReg_performance_eval(y_true, y_pred, y_score, roc_label):
    '''
    Function to evaluate performance of Logistic Regression (or any binary) classifier
    
    Inputs:
        y_true: True values of targets
        y_pred: Predicted values of targets (as determined by trained model)
        y_score: Target scores (probability estimates of the positive class)
        
    Outputs:
        Classification report
        ROC curve with AUC score reported
    '''
    
    # Performance evaluation
    print(classification_report(y_true, y_pred))

    # ROC, AUC
    fpr, tpr, _ = roc_curve(y_true, y_score)
    auc = roc_auc_score(y_true, y_score)    
    
    # ROC Curve
    #plt.figure()
    plt.plot(fpr, tpr, marker='.', label=f'{roc_label} (AUC = {round(auc, 3)})')
    plt.title('ROC Curve')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.legend(loc='lower right')
    #plt.show()
    

In [None]:
logReg_performance_eval(y_true=validationLabels, y_pred=validation_ypred, y_score=validation_yscore, roc_label='Val')
logReg_performance_eval(y_true=testLabels, y_pred=test_ypred, y_score=test_yscore, roc_label='Test')
