In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split  
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.svm import LinearSVC
from sklearn.naive_bayes import GaussianNB  
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.decomposition import PCA


In [2]:
data=pd.read_csv('../input/glaucoma/glaucoma.csv')

In [3]:
data.head()

Unnamed: 0,ag,at,as,an,ai,eag,eat,eas,ean,eai,...,tmt,tms,tmn,tmi,mr,rnf,mdic,emd,mv,Class
0,2.22,0.354,0.58,0.686,0.601,1.267,0.336,0.346,0.255,0.331,...,-0.018,-0.23,-0.51,-0.158,0.841,0.41,0.137,0.239,0.035,0
1,2.681,0.475,0.672,0.868,0.667,2.053,0.44,0.52,0.639,0.454,...,-0.014,-0.165,-0.317,-0.192,0.924,0.256,0.252,0.329,0.022,0
2,1.979,0.343,0.508,0.624,0.504,1.2,0.299,0.396,0.259,0.246,...,-0.097,-0.235,-0.337,-0.02,0.795,0.378,0.152,0.25,0.029,0
3,1.747,0.269,0.476,0.525,0.476,0.612,0.147,0.017,0.044,0.405,...,-0.035,-0.449,-0.217,-0.091,0.746,0.2,0.027,0.078,0.023,0
4,2.99,0.599,0.686,1.039,0.667,2.513,0.543,0.607,0.871,0.492,...,-0.105,0.084,-0.012,-0.054,0.977,0.193,0.297,0.354,0.034,0


**Class 0 represents no disease and class 1 represents glaucoma**

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 196 entries, 0 to 195
Data columns (total 63 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ag      196 non-null    float64
 1   at      196 non-null    float64
 2   as      196 non-null    float64
 3   an      196 non-null    float64
 4   ai      196 non-null    float64
 5   eag     196 non-null    float64
 6   eat     196 non-null    float64
 7   eas     196 non-null    float64
 8   ean     196 non-null    float64
 9   eai     196 non-null    float64
 10  abrg    196 non-null    float64
 11  abrt    196 non-null    float64
 12  abrs    196 non-null    float64
 13  abrn    196 non-null    float64
 14  abri    196 non-null    float64
 15  hic     196 non-null    float64
 16  mhcg    196 non-null    float64
 17  mhct    196 non-null    float64
 18  mhcs    196 non-null    float64
 19  mhcn    196 non-null    float64
 20  mhci    196 non-null    float64
 21  phcg    196 non-null    float64
 22  ph

**There is no missing values and null values**

In [5]:
data.describe()

Unnamed: 0,ag,at,as,an,ai,eag,eat,eas,ean,eai,...,tmt,tms,tmn,tmi,mr,rnf,mdic,emd,mv,Class
count,196.0,196.0,196.0,196.0,196.0,196.0,196.0,196.0,196.0,196.0,...,196.0,196.0,196.0,196.0,196.0,196.0,196.0,196.0,196.0,196.0
mean,2.606531,0.45898,0.651781,0.835929,0.659995,1.874138,0.406372,0.486403,0.501214,0.480097,...,-0.004658,-0.039806,-0.147204,-0.03651,0.904985,0.182383,0.23127,0.308903,0.033541,0.5
std,0.659789,0.127013,0.158462,0.222166,0.161065,0.721218,0.125941,0.184486,0.286181,0.173977,...,0.128062,0.150077,0.134205,0.136423,0.112356,0.09408,0.123882,0.128246,0.02185,0.50128
min,1.312,0.201,0.345,0.397,0.369,0.415,0.137,0.017,0.008,0.098,...,-0.291,-0.449,-0.51,-0.405,0.647,-0.297,0.012,0.047,0.0,0.0
25%,2.13925,0.37075,0.5385,0.681,0.5505,1.30925,0.31575,0.38075,0.2805,0.3725,...,-0.101,-0.13525,-0.231,-0.1275,0.826,0.11975,0.144,0.2305,0.021,0.0
50%,2.533,0.4445,0.6305,0.8085,0.632,1.8435,0.4025,0.4685,0.5035,0.484,...,-0.0185,-0.0315,-0.1465,-0.036,0.8995,0.182,0.227,0.298,0.028,0.5
75%,2.9435,0.528,0.73825,0.952,0.74975,2.317,0.48325,0.6055,0.6895,0.59475,...,0.08775,0.068,-0.05625,0.0495,0.9685,0.237,0.29925,0.37925,0.03825,1.0
max,5.444,0.967,1.34,1.765,1.373,4.125,0.848,1.225,1.568,0.961,...,0.366,0.358,0.245,0.418,1.317,0.451,0.663,0.743,0.183,1.0


In [6]:
X=data.drop("Class",axis=1) #Defining X
Y=data["Class"] # Defining Y

In [7]:
X.head() # first five values of X

Unnamed: 0,ag,at,as,an,ai,eag,eat,eas,ean,eai,...,tmg,tmt,tms,tmn,tmi,mr,rnf,mdic,emd,mv
0,2.22,0.354,0.58,0.686,0.601,1.267,0.336,0.346,0.255,0.331,...,-0.236,-0.018,-0.23,-0.51,-0.158,0.841,0.41,0.137,0.239,0.035
1,2.681,0.475,0.672,0.868,0.667,2.053,0.44,0.52,0.639,0.454,...,-0.211,-0.014,-0.165,-0.317,-0.192,0.924,0.256,0.252,0.329,0.022
2,1.979,0.343,0.508,0.624,0.504,1.2,0.299,0.396,0.259,0.246,...,-0.185,-0.097,-0.235,-0.337,-0.02,0.795,0.378,0.152,0.25,0.029
3,1.747,0.269,0.476,0.525,0.476,0.612,0.147,0.017,0.044,0.405,...,-0.148,-0.035,-0.449,-0.217,-0.091,0.746,0.2,0.027,0.078,0.023
4,2.99,0.599,0.686,1.039,0.667,2.513,0.543,0.607,0.871,0.492,...,-0.052,-0.105,0.084,-0.012,-0.054,0.977,0.193,0.297,0.354,0.034


In [8]:
Y.head() # first five values of Y

0    0
1    0
2    0
3    0
4    0
Name: Class, dtype: int64

In [9]:
pca = PCA(n_components=0.99)
X = pca.fit_transform(X)

# Splitting Training and test data

In [10]:
X_train, X_test, Y_train, Y_test= train_test_split(X,Y,test_size=0.2, stratify=Y,random_state=42)

# Model training

# LDA classifier

In [11]:
model=LinearDiscriminantAnalysis()
model.fit(X_train,Y_train)

LinearDiscriminantAnalysis()

In [12]:
##ACCURACY
XtestPrediction=model.predict(X_test)
testDataAccuracy=accuracy_score(XtestPrediction,Y_test)
print("Accuracy of LDA = ",testDataAccuracy)
AUC_score=roc_auc_score(XtestPrediction,Y_test)
print("AUC score = ",AUC_score)


Accuracy of LDA =  0.75
AUC score =  0.7525252525252525


# Naives Bayes classifier

In [13]:
model=GaussianNB()
model.fit(X_train,Y_train)

GaussianNB()

In [14]:
##ACCURACY
XtestPrediction=model.predict(X_test)
testDataAccuracy=accuracy_score(XtestPrediction,Y_test)
print("Accuracy of Naives Bayes classifier = ",testDataAccuracy)
AUC_score=roc_auc_score(XtestPrediction,Y_test)
print("AUC score = ",AUC_score)


Accuracy of Naives Bayes classifier =  0.8
AUC score =  0.8125


# SVM Classifier

In [15]:
from sklearn import svm
model=svm.SVC()
model.fit(X_train,Y_train)

SVC()

In [16]:
##ACCURACY
XtestPrediction=model.predict(X_test)
testDataAccuracy=accuracy_score(XtestPrediction,Y_test)
print("Accuracy of SVC = ",testDataAccuracy)
AUC_score=roc_auc_score(XtestPrediction,Y_test)
print("AUC score = ",AUC_score)

Accuracy of SVC =  0.825
AUC score =  0.8258145363408521


# Building an Artificial Neural Network 

In [17]:
pca = PCA(n_components=0.95)
X = pca.fit_transform(X)

In [18]:
X_train, X_test, Y_train,Y_test=train_test_split(X,Y,test_size=0.2)

In [19]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense

2021-12-28 10:20:41.093526: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0


In [20]:
X_train=pd.DataFrame(X_train)

In [21]:
model=Sequential()
model.add(Dense(units=1500, activation='relu',input_dim=len(X_train.columns)))
model.add(Dense(units=1,activation='sigmoid'))

2021-12-28 10:20:45.003504: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-12-28 10:20:45.006900: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-12-28 10:20:45.047964: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-12-28 10:20:45.048599: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:00:04.0 name: Tesla P100-PCIE-16GB computeCapability: 6.0
coreClock: 1.3285GHz coreCount: 56 deviceMemorySize: 15.90GiB deviceMemoryBandwidth: 681.88GiB/s
2021-12-28 10:20:45.048655: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-12-28 10:20:45.074769: I tensorflow/stream_executor/platform/def

In [22]:
model.compile(loss='binary_crossentropy',optimizer='adam', metrics='accuracy')

In [23]:
model.fit(X_train,Y_train,epochs=20, batch_size=32)

2021-12-28 10:20:47.105265: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2021-12-28 10:20:47.116226: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 2000175000 Hz


Epoch 1/20


2021-12-28 10:20:47.671131: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
1/5 [=====>........................] - ETA: 0s - loss: 0.2951 - accuracy: 0.9688

2021-12-28 10:20:48.425370: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11


Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f9b54039b50>

**Accuracy of ANN Classifier**

In [24]:
#Train data accuracy
y_TrainAccuracy = model.predict(X_train)
y_TrainAccuracy = [0 if val < 0.5 else 1 for val in y_TrainAccuracy]
accuracy_score(Y_train, y_TrainAccuracy)

0.8846153846153846

In [25]:
#Test Data accuracy
y_TestAccuracy = model.predict(X_test)
y_TestAccuracy = [0 if val < 0.5 else 1 for val in y_TestAccuracy]
acc_score= accuracy_score(Y_test, y_TestAccuracy)
print("Accuracy = ",acc_score)
AUC_score=roc_auc_score(y_TestAccuracy,Y_test)
print("AUC score = ",AUC_score)

Accuracy =  0.8
AUC score =  0.8
