In [20]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np 
import pandas as pd
import os
from matplotlib import pyplot as ckvs
import seaborn as cksb
from IPython.display import display, HTML
from plotly import express
from sklearn import preprocessing, decomposition, feature_selection
from sklearn import tree, linear_model
from sklearn import neural_network
from sklearn import ensemble
from sklearn import preprocessing
from sklearn import pipeline
import sklearn

from sklearn import utils  
from sklearn import model_selection  
from sklearn import metrics  
from ucimlrepo import fetch_ucirepo 

ModuleNotFoundError: No module named 'plotly'

In [None]:
def DataRead():
    dr=os.getcwd()
    if "CKD_Data.csv" not in os.listdir():
        data = fetch_ucirepo(id=336) 
        dtck=pandas.concat([data.data.features,data.data.targets],axis=1)
        dtck.to_csv("CKD_Data.csv")
        print("------------------------")
        print("     Records: {}\n     Features: {}".format(dtck.shape[0],dtck.shape[1]))
        print("------------------------")
        return dtck
    else:
        dtck=pandas.read_csv("CKD_Data.csv")
        if 'Unnamed: 0' in dtck.columns:
            dtck=dtck.drop('Unnamed: 0',axis=1)
        print("------------------------")
        print("     Records: {}\n     Features: {}".format(dtck.shape[0],dtck.shape[1]))
        print("------------------------")
        return dtck
KDS=DataRead()
KDS.head()

In [None]:
def InfoData(dtck):
    resval1=len(dtck)
    print("\nData Information\n")
    print(dtck.info(),"\n")
    print("\nData Statistics\n")
    display(HTML(dtck.describe().to_html()))
    return dtck
KDS=InfoData(KDS)

In [None]:
def DataCleaning(dtck):
    data_length_init=len(dtck)
    if sum(dtck.isna().sum())>0:
        print("\nMissing Values Found!!\n")
        print(dtck.isna().sum())
        print("\nCleaning.....\n")
        dtck=dtck.dropna()
        data_length_after=len(dtck)
        dtck=utils.resample(dtck,replace = True, n_samples = int(len(dtck)*(data_length_init/data_length_after)), random_state = 10)
        print("\nMissing Values Status after cleaning\n")
        print(dtck.isna().sum())
        print(dtck.info())
        dtck=dtck.reset_index(drop=True)
    return dtck
KDS=DataCleaning(KDS)
KDS.head()

In [None]:
def RectifyFet(dtck):
    dtck['class']=dtck['class'].replace("ckd\t","ckd")
    return dtck
KDS=RectifyFet(KDS)
print(KDS['class'].value_counts())
KDS.head()

In [None]:
def CatGraph(dtck,ft,nm):
    pandas.crosstab(dtck[ft],dtck['class']).plot(kind='barh',figsize=(4,2), color=['m','g'],title="Kidney Disease by {}".format(nm))
cat_fets=["rbc","pc","pcc","ane","ba","appet"]
names=["Red blood cells", "Pus cell", "Clumps of Pus cell", "Anaemia Possibility","Bacteria Infection", "Level Of Appetite"]
for cf in range(len(cat_fets)):
    CatGraph(KDS,cat_fets[cf],names[cf])

In [None]:
def NumGraph(dtck,ft, nm):
    kdnyclass=dtck[dtck['class']=='ckd']
    hltyclass=dtck[dtck['class']=='notckd']
    ckvs.figure(figsize=(4,4))
    ckvs.title("Kidney Disease by {}".format(nm))
    ckvs.pie([kdnyclass[ft].mean(),hltyclass[ft].mean()],labels=["CKD","NOT CKD"],
                         colors=cksb.color_palette('Set3'), autopct='%1.0f%%',pctdistance=0.5, labeldistance=0.2)
    ckvs.show()
num_fets=["bgr","bu","sc","sod","hemo"]
names=["Amount of Glucose", "Amount of Urea", "Amount of Serum Creatinine", "Sodium Level", "Amount of Haemoglobin"]
for nf in range(len(num_fets)):
    NumGraph(KDS,num_fets[nf], names[nf])

In [None]:
def DataEncoding(dtck):
    dtcktg=dtck['class']
    dtck1=dtck.drop('class',axis=1)
    dtckcat=dtck1.dtypes[dtck1.dtypes=='object'].index.tolist()
    for k in range(len(dtckcat)):
        dtck1[dtckcat[k]]=dtck1[dtckcat[k]].replace(dtck1[dtckcat[k]].unique(),[x for x in range(len(dtck1[dtckcat[k]].unique()))])
    dtck2=pandas.concat([dtck1,dtcktg],axis=1)
    return dtck2
ECKDS=DataEncoding(KDS)
ECKDS.head()

In [None]:
def PCACK(dtck,n,w,h,col,TX):   
    arrdtck=numpy.array(dtck.iloc[:,:-1]) 
    pcdtck = decomposition.PCA(n_components=n) 
    pcdtck.fit(arrdtck) 
    pcdtckcm=["Comp-{}".format(i+1) for i in range(len(pcdtck.explained_variance_ratio_.tolist()))]
    ckvs.figure(figsize=(w,h))  
    ckvs.title("Variance(PCA={})\n{} Normalization Data\nMaximum Variance Value: {}".format(n,TX,round(max(pcdtck.explained_variance_ratio_),8)),fontsize=18)
    ckvs.bar(pcdtckcm,pcdtck.explained_variance_ratio_.tolist(),width=0.5,color=col) 
    ckvs.xlabel("PCA",fontsize=14)
    ckvs.ylabel("Variance",fontsize=14)
    ckvs.grid()
    ckvs.show()
    return pcdtck.explained_variance_ratio_

def DataScale(dtck): 
    ssnorm = preprocessing.StandardScaler() 
    ckscl=ssnorm.fit_transform(dtck) 
    return ckscl

In [None]:
pcvl=[]
pcvl.append(PCACK(ECKDS.drop('class',axis=1),2,6,3,"#FF00FF","Before"))

outvl=[]  
for pv in pcvl: 
    for p in pv:
        if p>0.6:
            outvl.append(True)
if len(outvl)==1 and True in outvl:
    ScKDSdt=DataScale(ECKDS.drop('class',axis=1))   
ScKDS=pandas.DataFrame(ScKDSdt,columns=ECKDS.drop('class',axis=1).columns.tolist())     
ScKDS['class']=ECKDS['class']
PCACK(ScKDS[::-1],2,6,3,"#6AFB92","After")
ScKDS.head()

In [None]:
def CKF1(dtck):
    Xdtck=dtck.drop([dtck.columns.tolist()[-1]],axis=1)
    Ydtck=dtck[dtck.columns.tolist()[-1]]
    nmft=Xdtck.columns.tolist()
    ckensm = ensemble.RandomForestClassifier(random_state=0)
    ckensm.fit(Xdtck, Ydtck)
    impck = ckensm.feature_importances_
    ftsckdf=pandas.DataFrame({"Feature":nmft,"Importance":impck})
    ftsckdf1=ftsckdf[ftsckdf['Importance']>0.002]
    ckvs.figure(figsize=(7,3))
    ckvs.bar(ftsckdf1['Feature'],ftsckdf1['Importance'])
    ckvs.title("Feature Importance",fontsize=20,color="b")
    ckvs.xlabel("Features",fontsize=17,color="b")
    ckvs.ylabel("Importance",fontsize=17,color="b")
    ckvs.xticks(rotation=90)
    ckvs.grid()
    ckvs.show()
    display(HTML(ftsckdf1.to_html()))
    print("Total Features Selecetd Using Ensemble: {}".format(len(ftsckdf1)))
    return ftsckdf1['Feature'].tolist()

In [None]:
def CKF2(dtck):
    print("--------------------------------------------------------------")
    print("Feature Selection for the Data using RFE")
    print("--------------------------------------------------------------")
    Xdtck=dtck.drop([dtck.columns.tolist()[-1]],axis=1)
    Ydtck=dtck[dtck.columns.tolist()[-1]]
    Ydtck=Ydtck.replace(Ydtck.unique(),[x for x in range(len(Ydtck.unique()))]) 
    M2 = feature_selection.RFE(estimator=linear_model.LogisticRegression(),n_features_to_select = int(len(Xdtck.columns)*0.8), step = 0.7)
    M2Trnd=M2.fit(Xdtck,Ydtck)
    print("--------------------------------------------------------------")
    ftrfe=pandas.DataFrame({"Feature":Xdtck.columns,"Ranking":M2Trnd.ranking_})
    ftrfe2=ftrfe[ftrfe['Ranking']==1]
    print("Total Features Selecetd Using RFE: {}".format(len(ftrfe2)))
    display(HTML(ftrfe2.to_html()))
    return ftrfe2['Feature'].tolist()

In [None]:
kdsfets=[]  
ensft=CKF1(ScKDS) 
rfft=CKF2(ScKDS) 
for x in ensft:   
    if x in rfft:   
        kdsfets.append(x) 

In [None]:
X=ECKDS.drop('class',axis=1)
X=X[kdsfets]
y=ECKDS['class']
x_train,x_test,y_train,y_test=model_selection.train_test_split(X,y, train_size=0.8, random_state=10)
print(y_test.value_counts())

In [None]:
X

In [None]:
CkModelInit=[
    tree.DecisionTreeClassifier(min_weight_fraction_leaf=0.4),
    ensemble.RandomForestClassifier(min_weight_fraction_leaf=0.4),
]
ckmdnm=[
    "Decision Tree Classifier",
    "Random Forest",
]

MetCKInit=[[],[],[],[]]
print("_____________________________________________________________________________")
for i in range(len(CkModelInit)):
    print("                            {} ".format(ckmdnm[i]))
    print("_____________________________________________________________________________")
    ScMetLp=[[],[],[],[],[],[]]
    for ts in range(10):
        CkModelInit[i].fit(x_train,y_train)
        kidprd=CkModelInit[i].predict(x_test)
        ScMetLp[0].append(round(metrics.accuracy_score(y_test,kidprd)*100,2))
        ScMetLp[1].append(round(metrics.precision_score(y_test, kidprd, average='weighted'),2)*100)
        ScMetLp[2].append(round(metrics.recall_score(y_test, kidprd, average='weighted'),2)*100)
        ScMetLp[3].append(round(metrics.f1_score(y_test, kidprd, average='weighted'),2)*100)
        cm=pandas.crosstab(y_test, kidprd, rownames=['True'], colnames=['Predicted'], margins=True)
        ScMetLp[4].append(cm.iloc[:2,:2])
        ScMetLp[5].append(metrics.classification_report(y_test, kidprd))
    opt_idx=ScMetLp[0].index(max(ScMetLp[0]))
    MetCKInit[0].append(ScMetLp[0][opt_idx])
    MetCKInit[1].append(ScMetLp[1][opt_idx])
    MetCKInit[2].append(ScMetLp[2][opt_idx])
    MetCKInit[3].append(ScMetLp[3][opt_idx])
    print("\nAccuracy: {}%\n".format(ScMetLp[0][opt_idx]))
    print("\nClassification Report for {} \n\n{}".format(ckmdnm[i],ScMetLp[5][opt_idx]))
    print("\nConfusion Matrix for {} \n\n{}\n".format(ckmdnm[i],ScMetLp[4][opt_idx]))
    print("_____________________________________________________________________________")

In [None]:
ResInitCKD=pandas.DataFrame({
    "Classifiers":ckmdnm,
    "Accuracy":MetCKInit[0],
    "Precision":MetCKInit[1],
    "Recall":MetCKInit[2],
    "F1-Score":MetCKInit[3],
    
})

for i in ResInitCKD.columns.tolist()[1:]:
    ResInitCKD=ResInitCKD.sort_values(by=i,ascending=False)
    fig = express.bar(ResInitCKD, y=i, x="Classifiers",text=i,color="Classifiers",
                 title="Comparison of {}".format(i),height=400,width=600)
    fig.update_layout(
        font=dict(
            family="Times New Roman, Bold",
            size=15,
            color="black"
        )
    )
    fig.show()

In [18]:
import warnings
warnings.filterwarnings("ignore")
import numpy 
import pandas
import os
from matplotlib import pyplot as ckvs
import seaborn as cksb
from IPython.display import display, HTML
from plotly import express
from sklearn import preprocessing, decomposition, feature_selection
from sklearn import tree, linear_model
from sklearn import neural_network
from sklearn import ensemble
from sklearn import preprocessing
from sklearn import pipeline
import sklearn

from sklearn import utils  
from sklearn import model_selection  
from sklearn import metrics  
from ucimlrepo import fetch_ucirepo 

ModuleNotFoundError: No module named 'plotly'