<a href="https://colab.research.google.com/github/Aravindh-dasari/chronicdisease/blob/Kidney/Kidney_Model_Code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn import ensemble
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

import joblib

In [2]:
df = pd.read_csv("/content/drive/MyDrive/Chronic_Project/kidney.csv")

In [3]:
df[['htn','dm','cad','pe','ane']] = df[['htn','dm','cad','pe','ane']].replace(to_replace={'yes':1,'no':0})
df[['rbc','pc']] = df[['rbc','pc']].replace(to_replace={'abnormal':1,'normal':0})
df[['pcc','ba']] = df[['pcc','ba']].replace(to_replace={'present':1,'notpresent':0})
df[['appet']] = df[['appet']].replace(to_replace={'good':1,'poor':0,'no':np.nan})
df['classification'] = df['classification'].replace(to_replace={'ckd':1.0,'ckd\t':1.0,'notckd':0.0,'no':0.0})
df.rename(columns={'classification':'class'},inplace=True)
df['pe'] = df['pe'].replace(to_replace='good',value=0)
df['appet'] = df['appet'].replace(to_replace='no',value=0)
df['cad'] = df['cad'].replace(to_replace='\tno',value=0)
df['dm'] = df['dm'].replace(to_replace={'\tno':0,'\tyes':1,' yes':1, '':np.nan})

df.drop('id',axis=1,inplace=True)
df = df.dropna(axis=0)

In [4]:
cols = ['bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc']
X = df[cols]
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state=44, stratify= y)
print('Shape training set: X:{}, y:{}'.format(X_train.shape, y_train.shape))
print('Shape test set: X:{}, y:{}'.format(X_test.shape, y_test.shape))

Shape training set: X:(105, 7), y:(105,)
Shape test set: X:(53, 7), y:(53,)


In [5]:
models = {
    "                   Logistic Regression": LogisticRegression(),
    "                   K-Nearest Neighbors": KNeighborsClassifier(),
    "                         Decision Tree": DecisionTreeClassifier(),
    "Support Vector Machine (Linear Kernel)": LinearSVC(max_iter=1500),
    "   Support Vector Machine (RBF Kernel)": SVC(max_iter=1500),
    "                        Neural Network": MLPClassifier(max_iter=2500),
    "                         Random Forest": RandomForestClassifier(),
    "                     Gradient Boosting": GradientBoostingClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    print(name + " trained.")
print('--------------------------------')

                   Logistic Regression trained.
                   K-Nearest Neighbors trained.
                         Decision Tree trained.
Support Vector Machine (Linear Kernel) trained.
   Support Vector Machine (RBF Kernel) trained.
                        Neural Network trained.




                         Random Forest trained.
                     Gradient Boosting trained.
--------------------------------


In [6]:
for name, model in models.items():
  print(name + ": {:.2f}%".format(model.score(X_test, y_test) * 100))
print("------------------------------")

                   Logistic Regression: 96.23%
                   K-Nearest Neighbors: 96.23%
                         Decision Tree: 98.11%
Support Vector Machine (Linear Kernel): 96.23%
   Support Vector Machine (RBF Kernel): 73.58%
                        Neural Network: 73.58%
                         Random Forest: 98.11%
                     Gradient Boosting: 98.11%
------------------------------


In [7]:
Random_Forest = models["                         Random Forest"]

In [9]:
pred = Random_Forest.predict(X_test)

cm = confusion_matrix(y_test,pred)
print(cm)

print("Accuracy:", accuracy_score(y_test,pred))
print(classification_report(y_test,pred))

[[39  0]
 [ 1 13]]
Accuracy: 0.9811320754716981
              precision    recall  f1-score   support

         0.0       0.97      1.00      0.99        39
         1.0       1.00      0.93      0.96        14

    accuracy                           0.98        53
   macro avg       0.99      0.96      0.98        53
weighted avg       0.98      0.98      0.98        53



In [10]:
joblib.dump(model,r"/content/drive/MyDrive/Chrnoic_Models/kidney_model.pkl")

['/content/drive/MyDrive/Chrnoic_Models/kidney_model.pkl']