In [None]:
import numpy as np 
import pandas as pd
import torch.nn as nn
import matplotlib.pyplot as plt 
import  seaborn as sna
from google.colab import files 
uploded = files.upload()
df= pd.read_csv('kidney_disease-1.csv')
df.head(10)

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
# Mapping the text to 1/0 and cleaning the dataset 
df[['htn','dm','cad','pe','ane']] = df[['htn','dm','cad','pe','ane']].replace(to_replace={'yes':1,'no':0})
df[['rbc','pc']] = df[['rbc','pc']].replace(to_replace={'abnormal':1,'normal':0})
df[['pcc','ba']] = df[['pcc','ba']].replace(to_replace={'present':1,'notpresent':0})
df[['appet']] = df[['appet']].replace(to_replace={'good':1,'poor':0,'no':np.nan})
df['classification'] = df['classification'].replace(to_replace={'ckd':1.0,'ckd\t':1.0,'notckd':0.0,'no':0.0})
df.rename(columns={'classification':'class'},inplace=True)
df['pe'] = df['pe'].replace(to_replace='good',value=0) # Not having pedal edema is good
df['appet'] = df['appet'].replace(to_replace='no',value=0)
df['cad'] = df['cad'].replace(to_replace='\tno',value=0)
df['dm'] = df['dm'].replace(to_replace={'\tno':0,'\tyes':1,' yes':1, '':np.nan})
df.drop('id',axis=1,inplace=True)

In [None]:
df.dtypes

In [None]:
df.isna().values.any()

In [None]:
df.info()

In [None]:
sna.countplot(df['class'])
plt.ylabel('Total numbers of 0 to 4 values')
plt.xlabel('Actual numbers in the database')

In [None]:
#sna.countplot(df['Power'], label = 'count')
plt.figure(figsize=(5,4))
sna.set(style="darkgrid")
ax = sna.countplot(x="class", data=df)
plt.ylabel('Total numbers')
plt.xlabel('Convert them into 0 and 1')

In [None]:
df.isna().sum()

In [None]:
from sklearn.impute import SimpleImputer
s_imputer =SimpleImputer(missing_values=np.nan,strategy='mean',verbose=0)
s_imputer =s_imputer.fit(df.iloc[:, 0:15])
df.iloc[:, 0:15]=s_imputer.transform(df.iloc[:, 0:15])

from sklearn.impute import SimpleImputer
s_imputer =SimpleImputer(missing_values=np.nan,strategy='mean',verbose=0)
s_imputer =s_imputer.fit(df.iloc[:, 16:17])
df.iloc[:, 16:17]=s_imputer.transform(df.iloc[:, 16:17])

from sklearn.impute import SimpleImputer
s_imputer =SimpleImputer(missing_values=np.nan,strategy='mean',verbose=0)
s_imputer =s_imputer.fit(df.iloc[:, 18:24])
df.iloc[:, 18:24]=s_imputer.transform(df.iloc[:, 18:24])

In [None]:
df.isna().sum()

In [None]:
plt.figure(figsize=(18,18))
matrix = np.triu(df.corr())
sna.heatmap(df.corr(), annot=True,cmap="magma", mask=matrix)
plt.show()

In [None]:
import pandas as pd
from sklearn import preprocessing
x = df.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
df = pd.DataFrame(x_scaled)
df.head(10)

In [None]:
X = df.iloc[:,:-1].values
Y = df.iloc[:,-1].values
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.20,shuffle=True,random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

In [None]:
X_train.shape

In [None]:
Y_test.shape

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Random Forest
random_forest = RandomForestClassifier(n_estimators = 10, max_features=24, criterion='entropy', random_state = 42)
random_forest.fit(X_train, Y_train)
random_forest.score(X_test, Y_test)

In [None]:
#Applying StandardScaler
scaler = StandardScaler()

In [None]:
#Reshaping the dataset to 3-D to pass it through CNN
X_train = X_train.reshape(320,24,1)
X_test = X_test.reshape(80,24,1)

## **CNN**

In [None]:
import tensorflow as tf 
import time
import pandas as pd  
import numpy as np 
import matplotlib.pyplot as plt 
import keras 
from keras import regularizers
from keras.models import Sequential 
from keras.layers import Dense, Flatten, Conv1D, Conv2D, MaxPooling2D, Dropout, Activation, BatchNormalization
from sklearn.metrics import confusion_matrix 
from keras.optimizers import Adam, RMSprop, SGD, Adamax, Nadam

#Preparing the CNN model
model = Sequential()
model.add(Conv1D(filters=16,kernel_size=2,kernel_initializer='normal', kernel_regularizer= regularizers.l2(0.01),activation='relu',input_shape=(24,1)))
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv1D(16,1,kernel_initializer='normal', kernel_regularizer= regularizers.l2(0.01),activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Flatten())
model.add(Dense(16,kernel_initializer='normal', kernel_regularizer= regularizers.l2(0.01),activation='relu'))
model.add(Dropout(0.1))

model.add(Dense(8,kernel_initializer='normal', kernel_regularizer= regularizers.l2(0.01),activation='relu'))
model.add(Dropout(0.15))

model.add(Dense(1,kernel_initializer='normal', kernel_regularizer= regularizers.l2(0.01),activation='sigmoid'))
print(model.summary())
start = time.time()
model.compile(loss='mse', optimizer='adam') #adam
print('Compilation time: ', time.time() - start)
start = time.time()

model.compile(optimizer=Adam(learning_rate=0.1),loss='binary_crossentropy',metrics=['accuracy'])

In [None]:
history = model.fit(X_train,Y_train,epochs=8,verbose=1,validation_data=(X_test,Y_test))

In [None]:
# Final evaluation of the model
scores = model.evaluate(X_test, Y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
#Plots of Accuracy and Loss
def plotLearningCurve(history,epochs):
  epochRange = range(1,epochs+1)
  plt.plot(epochRange,history.history['accuracy'])
  plt.plot(epochRange,history.history['val_accuracy'])
  plt.title('Model Accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.show()

  plt.plot(epochRange,history.history['loss'])
  plt.plot(epochRange,history.history['val_loss'])
  plt.title('Model Loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.show()

In [None]:
plotLearningCurve(history,8)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, log_loss, cohen_kappa_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
y_true, y_pred = Y_test , model.predict(X_test)

print('Results on the test set:')
y_pred = y_pred.round()
print(classification_report(y_true, y_pred))

cm7 = confusion_matrix(y_true, y_pred)
 
TP = cm7[0][0]
TN = cm7[1][1]
FP = cm7[0][1]
FN = cm7[1][0]

var = ((TP + TN)/(TP + TN + FP + FN)) *100
print('Testing accuracy : ',var)
print('Sensitivity : ', TP/(TP+FN)*100)
print('Specificity : ', TN/(TN+FP)*100)
print('Precision : ', TP/(TP+FP)*100)
print('false positive rate : ', FP/(FP+TN)*100)
print('false negative rate : ', FN/(FN+TP)*100)
print('Negative Predictive Value : ', TN/(TN+FN)*100)
print('False Discovery rate : ', FP/(TP+FP)*100)
print('Mean Absolute Error:', metrics.mean_absolute_error(y_true, y_pred)*100)
print('Mean Squared Error:', metrics.mean_squared_error(y_true, y_pred)*100)
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_true, y_pred))*100)
#print('Log_Loss:', metrics.log_loss(y_true, y_pred)*100)
#print('Cohen_Kappa_Score:', metrics.cohen_kappa_score(y_true, y_pred)*100)

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
fpr, tpr, _ = roc_curve(y_true, y_pred) 
fig, ax1 = plt.subplots(1,1, figsize = (5, 5), dpi = 100)
ax1.plot(fpr, tpr, 'b.-', label = 'CNN_Model (AUC:%2.2f)' % roc_auc_score(y_true, y_pred))
ax1.plot(fpr, fpr, 'k-', label = 'Random Guessing')
ax1.legend(loc = 4)
ax1.set_xlabel('False Positive Rate')
ax1.set_ylabel('True Positive Rate');
fig.savefig('roc.pdf')

## **ANN**

In [None]:
import tensorflow as tf 
import time
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import keras 
from keras.models import Sequential 
from keras.layers import Dense 
from sklearn.metrics import confusion_matrix 

def create_model():
    classifier = Sequential() 
    classifier.add(Dense(activation = "relu", input_dim = 24, units = 8, kernel_initializer = "uniform"))
#classifier.add(Dense(activation = "relu", units = 28,  
                    # kernel_initializer = "uniform"))  
    classifier.add(Dense(activation = "relu", units = 18, kernel_initializer = "uniform")) 
    classifier.add(Dense(activation = "sigmoid", units = 1,  
 kernel_initializer = "uniform")) 

    start = time.time()
    classifier.compile(loss='mse', optimizer='adam')
    print('compilation time : ', time.time() - start)
    
    adam = Adam(lr=0.09)
    classifier.compile(optimizer = 'adam' , loss = 'binary_crossentropy',  
                   metrics = ['accuracy'] ) 

    return classifier
classifier = create_model()
print(classifier.summary())

In [None]:
history = classifier.fit(X_train,Y_train,epochs=8,verbose=1,validation_data=(X_test,Y_test))

In [None]:
# Final evaluation of the model
scores = classifier.evaluate(X_test, Y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
#Plots of Accuracy and Loss
def plotLearningCurve(history,epochs):
  epochRange = range(1,epochs+1)
  plt.plot(epochRange,history.history['accuracy'])
  plt.plot(epochRange,history.history['val_accuracy'])
  plt.title('Model Accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.show()

  plt.plot(epochRange,history.history['loss'])
  plt.plot(epochRange,history.history['val_loss'])
  plt.title('Model Loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.show()

In [None]:
plotLearningCurve(history,8)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, log_loss, cohen_kappa_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
y_true, y_pred = Y_test , model.predict(X_test)

print('Results on the test set:')
y_pred = y_pred.round()
print(classification_report(y_true, y_pred))

cm7 = confusion_matrix(y_true, y_pred)
 
TP = cm7[0][0]
TN = cm7[1][1]
FP = cm7[0][1]
FN = cm7[1][0]

var = ((TP + TN)/(TP + TN + FP + FN)) *100
print('Testing accuracy : ',var)
print('Sensitivity : ', TP/(TP+FN)*100)
print('Specificity : ', TN/(TN+FP)*100)
print('Precision : ', TP/(TP+FP)*100)
print('false positive rate : ', FP/(FP+TN)*100)
print('false negative rate : ', FN/(FN+TP)*100)
print('Negative Predictive Value : ', TN/(TN+FN)*100)
print('False Discovery rate : ', FP/(TP+FP)*100)
print('Mean Absolute Error:', metrics.mean_absolute_error(y_true, y_pred)*100)
print('Mean Squared Error:', metrics.mean_squared_error(y_true, y_pred)*100)
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_true, y_pred))*100)
#print('Log_Loss:', metrics.log_loss(y_true, y_pred)*100)
#print('Cohen_Kappa_Score:', metrics.cohen_kappa_score(y_true, y_pred)*100)

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
fpr, tpr, _ = roc_curve(y_true, y_pred) 
fig, ax1 = plt.subplots(1,1, figsize = (5, 5), dpi = 100)
ax1.plot(fpr, tpr, 'b.-', label = 'ANN_Model (AUC:%2.2f)' % roc_auc_score(y_true, y_pred))
ax1.plot(fpr, fpr, 'k-', label = 'Random Guessing')
ax1.legend(loc = 4)
ax1.set_xlabel('False Positive Rate')
ax1.set_ylabel('True Positive Rate');
fig.savefig('roc.pdf')

## **LSTM Approach**

In [None]:
from keras.layers.recurrent import LSTM
from keras.models import Sequential

In [None]:
X_train = X_train.reshape(320,24,1)
X_test = X_test.reshape(80,24,1)

In [None]:
# The LSTM architecture
regressor = Sequential()
# First LSTM layer with Dropout regularisation
regressor.add(LSTM(units=64, activation= 'relu', kernel_initializer= 'uniform', return_sequences = True, input_shape=(X_train.shape[1],X_train.shape[2])))
regressor.add(Dropout(0.5))
# Second LSTM layer
regressor.add(LSTM(units=32,activation= 'relu', return_sequences=True))
regressor.add(Dropout(0.1))
# Third LSTM layer
regressor.add(LSTM(units=24, return_sequences=True))
regressor.add(Dropout(0.08))
# Fourth LSTM layer
regressor.add(LSTM(units=64))
regressor.add(Dropout(0.8))
# The output layer
regressor.add(Dense(units=1))


regressor.compile(optimizer="adam",loss="binary_crossentropy", metrics=['accuracy'])
print('compilation time : ', time.time() - start)
# Fitting to the training set
#regressor.fit(X_train,Y_train, batch_size=8, epochs=20,verbose=1,validation_split=0.10)
history = regressor.fit(X_train,Y_train,batch_size=8, epochs=8,verbose=1,validation_data=(X_test,Y_test))

In [None]:
# Final evaluation of the model
scores = regressor.evaluate(X_test, Y_test, verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

In [None]:
#Plots of Accuracy and Loss
def plotLearningCurve(history,epochs):
  epochRange = range(1,epochs+1)
  plt.plot(epochRange,history.history['accuracy'])
  plt.plot(epochRange,history.history['val_accuracy'])
  plt.title('Model Accuracy')
  plt.xlabel('Epoch')
  plt.ylabel('Accuracy')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.show()

  plt.plot(epochRange,history.history['loss'])
  plt.plot(epochRange,history.history['val_loss'])
  plt.title('Model Loss')
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.legend(['Train','Validation'],loc='upper left')
  plt.show()

In [None]:
plotLearningCurve(history,8)

In [None]:
from sklearn.metrics import roc_curve, roc_auc_score
fpr, tpr, _ = roc_curve(y_true, y_pred) 
fig, ax1 = plt.subplots(1,1, figsize = (5, 5), dpi = 100)
ax1.plot(fpr, tpr, 'b.-', label = 'LSTM_Model (AUC:%2.2f)' % roc_auc_score(y_true, y_pred))
ax1.plot(fpr, fpr, 'k-', label = 'Random Guessing')
ax1.legend(loc = 4)
ax1.set_xlabel('False Positive Rate')
ax1.set_ylabel('True Positive Rate');
fig.savefig('roc.pdf')

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, log_loss, cohen_kappa_score
from sklearn.model_selection import train_test_split
from sklearn import metrics
y_true, y_pred = Y_test , model.predict(X_test)

print('Results on the test set:')
y_pred = y_pred.round()
print(classification_report(y_true, y_pred))

cm7 = confusion_matrix(y_true, y_pred)
 
TP = cm7[0][0]
TN = cm7[1][1]
FP = cm7[0][1]
FN = cm7[1][0]

var = ((TP + TN)/(TP + TN + FP + FN)) *100
print('Testing accuracy : ',var)
print('Sensitivity : ', TP/(TP+FN)*100)
print('Specificity : ', TN/(TN+FP)*100)
print('Precision : ', TP/(TP+FP)*100)
print('false positive rate : ', FP/(FP+TN)*100)
print('false negative rate : ', FN/(FN+TP)*100)
print('Negative Predictive Value : ', TN/(TN+FN)*100)
print('False Discovery rate : ', FP/(TP+FP)*100)
print('Mean Absolute Error:', metrics.mean_absolute_error(y_true, y_pred)*100)
print('Mean Squared Error:', metrics.mean_squared_error(y_true, y_pred)*100)
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_true, y_pred))*100)
#print('Log_Loss:', metrics.log_loss(y_true, y_pred)*100)
#print('Cohen_Kappa_Score:', metrics.cohen_kappa_score(y_true, y_pred)*100)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score
print('Precision: %.3f' % precision_score(Y_test, y_pred))
print('Recall: %.3f' % recall_score(Y_test, y_pred))
print('F1 Score: %.3f' % f1_score(Y_test, y_pred))