In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report,accuracy_score
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

def auc_scorer(clf, X, y, model): # Helper function to plot the ROC curve
    if model=='RF':
        fpr, tpr, _ = roc_curve(y, clf.predict_proba(X)[:,1])
    elif model=='SVM':
        fpr, tpr, _ = roc_curve(y, clf.decision_function(X))
    roc_auc = auc(fpr, tpr)

    plt.figure()    # Plot the ROC curve
    plt.plot(fpr, tpr, label='ROC curve from '+model+' model (area = %0.3f)' % roc_auc)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.legend(loc="lower right")
    plt.show()

    return fpr,tpr,roc_auc

# from subprocess import check_output
# print(check_output(["ls", "../input"]).decode("utf8"))

In [13]:
df = pd.read_csv('ckd.csv')


In [14]:
df

Unnamed: 0,Age,Bp,Sg,Al,Su,Rbc,Pc,Pcc,Ba,Bgr,...,Pcv,Wbcc,Rbcc,Htn,Dm,Cad,Appet,pe,Ane,Class
0,48,80,1.02,1,0,?,normal,notpresent,notpresent,121,...,44,7800,5.2,yes,yes,no,good,no,no,ckd
1,7,50,1.02,4,0,?,normal,notpresent,notpresent,?,...,38,6000,?,no,no,no,good,no,no,ckd
2,62,80,1.01,2,3,normal,normal,notpresent,notpresent,423,...,31,7500,?,no,yes,no,poor,no,yes,ckd
3,48,70,1.005,4,0,normal,abnormal,present,notpresent,117,...,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
4,51,80,1.01,2,0,normal,normal,notpresent,notpresent,106,...,35,7300,4.6,no,no,no,good,no,no,ckd
5,60,90,1.015,3,0,?,?,notpresent,notpresent,74,...,39,7800,4.4,yes,yes,no,good,yes,no,ckd
6,68,70,1.01,0,0,?,normal,notpresent,notpresent,100,...,36,?,?,no,no,no,good,no,no,ckd
7,24,?,1.015,2,4,normal,abnormal,notpresent,notpresent,410,...,44,6900,5,no,yes,no,good,yes,no,ckd
8,52,100,1.015,3,0,normal,abnormal,present,notpresent,138,...,33,9600,4,yes,yes,no,good,no,yes,ckd
9,53,90,1.02,2,0,abnormal,abnormal,present,notpresent,70,...,29,12100,3.7,yes,yes,no,poor,no,yes,ckd


In [18]:
import numpy as np
import pandas as pd

# Read dataset file ckd.csv
dataset = pd.read_csv("ckd.csv",header=0, na_values="?")

# Replace null values "?" by numpy.NaN
dataset.replace("?", np.NaN)

# Convert nominal values to binary values
cleanup = {"Rbc":     {"normal": 1, "abnormal": 0},
           "Pc": {"normal": 1, "abnormal": 0},
           "Pcc": {"present": 1, "notpresent": 0},
           "Ba": {"present": 1, "notpresent": 0},
           "Htn": {"yes": 1, "no": 0},
           "Dm": {"yes": 1, "no": 0},
           "Cad": {"yes": 1, "no": 0},
           "Appet": {"good": 1, "poor": 0},
           "pe": {"yes": 1, "no": 0},
           "Ane": {"yes": 1, "no": 0}}

# Replace binary values into dataset
dataset.replace(cleanup, inplace=True)

# Fill null values with mean value of the respective column

dataset.fillna(round(dataset.mean(),2), inplace=True)

# print(dataset)

# Save this dataset as final.csv for further prediction
dataset.to_csv("final.csv", sep=',', index=False)

In [19]:
d=pd.read_csv("final.csv")

In [20]:
d

Unnamed: 0,Age,Bp,Sg,Al,Su,Rbc,Pc,Pcc,Ba,Bgr,...,Pcv,Wbcc,Rbcc,Htn,Dm,Cad,Appet,pe,Ane,Class
0,48.0,80.00,1.020,1.00,0.00,0.81,1.00,0.0,0.0,121.00,...,44.00,7800.00,5.20,1.0,1,0.0,1.0,0.0,0.0,ckd
1,7.0,50.00,1.020,4.00,0.00,0.81,1.00,0.0,0.0,148.04,...,38.00,6000.00,4.71,0.0,0,0.0,1.0,0.0,0.0,ckd
2,62.0,80.00,1.010,2.00,3.00,1.00,1.00,0.0,0.0,423.00,...,31.00,7500.00,4.71,0.0,1,0.0,0.0,0.0,1.0,ckd
3,48.0,70.00,1.005,4.00,0.00,1.00,0.00,1.0,0.0,117.00,...,32.00,6700.00,3.90,1.0,0,0.0,0.0,1.0,1.0,ckd
4,51.0,80.00,1.010,2.00,0.00,1.00,1.00,0.0,0.0,106.00,...,35.00,7300.00,4.60,0.0,0,0.0,1.0,0.0,0.0,ckd
5,60.0,90.00,1.015,3.00,0.00,0.81,0.77,0.0,0.0,74.00,...,39.00,7800.00,4.40,1.0,1,0.0,1.0,1.0,0.0,ckd
6,68.0,70.00,1.010,0.00,0.00,0.81,1.00,0.0,0.0,100.00,...,36.00,8406.12,4.71,0.0,0,0.0,1.0,0.0,0.0,ckd
7,24.0,76.47,1.015,2.00,4.00,1.00,0.00,0.0,0.0,410.00,...,44.00,6900.00,5.00,0.0,1,0.0,1.0,1.0,0.0,ckd
8,52.0,100.00,1.015,3.00,0.00,1.00,0.00,1.0,0.0,138.00,...,33.00,9600.00,4.00,1.0,1,0.0,1.0,0.0,1.0,ckd
9,53.0,90.00,1.020,2.00,0.00,0.00,0.00,1.0,0.0,70.00,...,29.00,12100.00,3.70,1.0,1,0.0,0.0,0.0,1.0,ckd
