In [1]:
import tensorflow as tf
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, LSTM, SimpleRNN, Dropout, GaussianNoise
from keras.optimizers import Adam, SGD
from keras.utils import to_categorical 
from matplotlib import pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

Using TensorFlow backend.


In [2]:
df = pd.read_excel('Overall Colorectal Cancer Generated data.xlsx')
df.set_index('Name',inplace=True)

In [3]:
df_outputs= df['Marker']
df.drop('Marker',axis=1,inplace=True)
df_inputs=df

In [4]:
#splitting the data to training and testing
train_inputs, test_inputs, train_outputs, test_outputs = train_test_split(df_inputs, df_outputs, test_size=0.3, shuffle=True)

In [5]:
train_outputs = to_categorical(train_outputs)
test_outputs = to_categorical(test_outputs)

In [6]:
#Parameters used in the network
PIs = 0
number_of_glycans = len(df_inputs.columns) #it's actually the number of glycans
iterations = 1000


In [7]:
#Build the network
#The #186 system
model = Sequential([
    #GaussianNoise(0.1,
    #    input_shape= (number_of_genes + PIs,)),
    Dense(units = 13,
          activation= 'relu'),
    #Dropout(0.2),
    Dense(units = 13,
          activation='sigmoid'),
    Dense(units = 2,
          activation = 'softmax')])

In [8]:
model.compile(
    optimizer = Adam(lr=0.01),
    loss = 'categorical_crossentropy',
    metrics = ['accuracy'])

In [9]:
model.fit(
    train_inputs.to_numpy(),
    train_outputs,
    epochs = iterations,
    batch_size = 32,
    verbose = 0,
    shuffle = False)

<keras.callbacks.callbacks.History at 0x294dc6d9cc0>

In [10]:
print(model.evaluate(
    test_inputs,
    test_outputs,
    batch_size = 32,
    verbose = 1))

[0.28142722917766105, 0.9073171019554138]


In [11]:
#Calculate the labels for the test set
predictions = model.predict(test_inputs)


In [13]:
#confusion matrix for the ANN
print(confusion_matrix(np.argmax(test_outputs, axis=1), np.argmax(predictions, axis=1)))
print(classification_report(np.argmax(test_outputs, axis=1), np.argmax(predictions, axis=1)))

[[270  33]
 [ 24 288]]
              precision    recall  f1-score   support

           0       0.92      0.89      0.90       303
           1       0.90      0.92      0.91       312

    accuracy                           0.91       615
   macro avg       0.91      0.91      0.91       615
weighted avg       0.91      0.91      0.91       615



In [14]:
#define SVC classifier. Pick the type of kernel.Train the classifier
svclassifier = SVC(kernel='linear')
#svclassifier = SVC(kernel='rbf')
#svclassifier = SVC(kernel='sigmoid')
svclassifier.fit(train_inputs, np.argmax(train_outputs, axis=1))

SVC(kernel='linear')

In [15]:
#use classifier on test set
y_pred = svclassifier.predict(test_inputs)

In [16]:
#confusion matrix for the SVC classifier
print(confusion_matrix(np.argmax(test_outputs,axis=1),y_pred))
print(classification_report(np.argmax(test_outputs,axis=1),y_pred))

[[282  21]
 [ 33 279]]
              precision    recall  f1-score   support

           0       0.90      0.93      0.91       303
           1       0.93      0.89      0.91       312

    accuracy                           0.91       615
   macro avg       0.91      0.91      0.91       615
weighted avg       0.91      0.91      0.91       615



In [17]:
#get matrices as vars
matrix_ANN=confusion_matrix(np.argmax(test_outputs, axis=1), np.argmax(predictions, axis=1))
matrix_lin=confusion_matrix(np.argmax(test_outputs,axis=1),y_pred) 

In [18]:
#prepare a table to populate
table = pd.DataFrame(np.zeros((4,2)),index='TruePos TrueNeg FalsePos FalseNeg'.split(),columns='ANN LinearClassifier'.split())

In [19]:
#populate ANN column
table['ANN'][0]=matrix_ANN[1][1]
table['ANN'][1]=matrix_ANN[0][0]
table['ANN'][2]=matrix_ANN[0][1]
table['ANN'][3]=matrix_ANN[1][0]

In [20]:
#populate classifier column
table['LinearClassifier'][0]=matrix_lin[1][1]
table['LinearClassifier'][1]=matrix_lin[0][0]
table['LinearClassifier'][2]=matrix_lin[0][1]
table['LinearClassifier'][3]=matrix_lin[1][0]

In [21]:
#print table detailing correct and incorrect predictions, by Positive/Negative
print(table)

            ANN  LinearClassifier
TruePos   288.0             279.0
TrueNeg   270.0             282.0
FalsePos   33.0              21.0
FalseNeg   24.0              33.0
