In [None]:
import warnings

warnings.filterwarnings('ignore')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [None]:
data = pd.read_csv('kidney_disease.csv')
data.head()

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,0,48.0,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,...,44,7800,5.2,yes,yes,no,good,no,no,ckd
1,1,7.0,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,...,38,6000,,no,no,no,good,no,no,ckd
2,2,62.0,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,...,31,7500,,no,yes,no,poor,no,yes,ckd
3,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,...,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
4,4,51.0,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,...,35,7300,4.6,no,no,no,good,no,no,ckd


In [None]:
data.dtypes

id                  int64
age               float64
bp                float64
sg                float64
al                float64
su                float64
rbc                object
pc                 object
pcc                object
ba                 object
bgr               float64
bu                float64
sc                float64
sod               float64
pot               float64
hemo              float64
pcv                object
wc                 object
rc                 object
htn                object
dm                 object
cad                object
appet              object
pe                 object
ane                object
classification     object
dtype: object

In [None]:
data = data.drop(columns = ['id'])

In [None]:
col={'age': 'age',
     'bp': 'blood_pressure',
     'sg': 'specific_gravity',
     'al': 'albumin',
     'su': 'sugar',
     'rbc': 'red_blood_cells',
     'pc': 'pus_cell',
     'pcc': 'pus_cell_clumps',
     'ba': 'bacteria',
     'bgr': 'blood_glucose_random',
     'bu': 'blood_urea',
     'sc': 'serum_creatinine',
     'sod': 'sodium',
     'pot': 'potassium',
     'hemo': 'hemoglobin',
     'pcv': 'packed_cell_volume',
     'wc': 'white_blood_cell_count',
     'rc': 'red_blood_cell_count',
     'htn': 'hypertension',
     'dm': 'diabetes_mellitus',
     'cad': 'coronary_artery_disease',
     'appet': 'appetite',
     'pe': 'pedal_edema',
     'ane': 'anemia',
     'classification': 'class'}
data.rename(columns=col, inplace=True)
data.head()

Unnamed: 0,age,blood_pressure,specific_gravity,albumin,sugar,red_blood_cells,pus_cell,pus_cell_clumps,bacteria,blood_glucose_random,...,packed_cell_volume,white_blood_cell_count,red_blood_cell_count,hypertension,diabetes_mellitus,coronary_artery_disease,appetite,pedal_edema,anemia,class
0,48.0,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,121.0,...,44,7800,5.2,yes,yes,no,good,no,no,ckd
1,7.0,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,,...,38,6000,,no,no,no,good,no,no,ckd
2,62.0,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,423.0,...,31,7500,,no,yes,no,poor,no,yes,ckd
3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,117.0,...,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
4,51.0,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,106.0,...,35,7300,4.6,no,no,no,good,no,no,ckd


In [None]:
data['packed_cell_volume'] = pd.to_numeric(data['packed_cell_volume'], errors='coerce')
data['white_blood_cell_count'] = pd.to_numeric(data['white_blood_cell_count'], errors='coerce')
data['red_blood_cell_count'] = pd.to_numeric(data['red_blood_cell_count'], errors='coerce')

In [None]:
data['class'] = data['class'].map({'ckd' : 1,'ckd\t' : 1, 'notckd' : 0})

In [None]:
data['class'].value_counts()

class
1    250
0    150
Name: count, dtype: int64

In [None]:
categorial_cols = [column for column in data.columns if data[column].dtype=='object']
numeric_cols = [column for column in data.columns if data[column].dtype!='object']

In [None]:
for column in categorial_cols:
    print(f'{column} has {data[column].unique()} values\n')

red_blood_cells has [nan 'normal' 'abnormal'] values

pus_cell has ['normal' 'abnormal' nan] values

pus_cell_clumps has ['notpresent' 'present' nan] values

bacteria has ['notpresent' 'present' nan] values

hypertension has ['yes' 'no' nan] values

diabetes_mellitus has ['yes' 'no' ' yes' '\tno' '\tyes' nan] values

coronary_artery_disease has ['no' 'yes' '\tno' nan] values

appetite has ['good' 'poor' nan] values

pedal_edema has ['no' 'yes' nan] values

anemia has ['no' 'yes' nan] values



In [None]:
data['diabetes_mellitus'].replace(to_replace={'\tno':'no', '\tyes':'yes', ' yes':'yes'}, inplace=True)
data['coronary_artery_disease'].replace(to_replace={'\tno':'no'}, inplace=True)

In [None]:
def mean(feature):
    mean = data[feature].median()
    data[feature] = data[feature].fillna(mean)

for column in numeric_cols:
    mean(column)

In [None]:
def mode(feature):
    mode = data[feature].mode()[0]
    data[feature] = data[feature].fillna(mode)

for column in categorial_cols:
    mode(column)

In [None]:
for column in categorial_cols:
    print(f"{column} - {data[column].nunique()} categories\n")

red_blood_cells - 2 categories

pus_cell - 2 categories

pus_cell_clumps - 2 categories

bacteria - 2 categories

hypertension - 2 categories

diabetes_mellitus - 2 categories

coronary_artery_disease - 2 categories

appetite - 2 categories

pedal_edema - 2 categories

anemia - 2 categories



In [None]:
label_encoder = LabelEncoder()
for column in categorial_cols:
    data[column] = label_encoder.fit_transform(data[column])

In [None]:
data.isnull().sum()

age                        0
blood_pressure             0
specific_gravity           0
albumin                    0
sugar                      0
red_blood_cells            0
pus_cell                   0
pus_cell_clumps            0
bacteria                   0
blood_glucose_random       0
blood_urea                 0
serum_creatinine           0
sodium                     0
potassium                  0
hemoglobin                 0
packed_cell_volume         0
white_blood_cell_count     0
red_blood_cell_count       0
hypertension               0
diabetes_mellitus          0
coronary_artery_disease    0
appetite                   0
pedal_edema                0
anemia                     0
class                      0
dtype: int64

In [None]:
ind_col = [column for column in data.columns if column != 'class']
X = data[ind_col]
y = data['class']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
y_test.value_counts()

class
1    52
0    28
Name: count, dtype: int64

# SVM

In [None]:
svm = SVC(class_weight='balanced')
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1score = f1_score(y_test, y_pred)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}\n")
print(f"Accuracy is {accuracy} \n")
print(f"Precision is {precision} \n")
print(f"Recall is {recall} \n")
print(f"F1_score is {f1score} \n")
print(f"Classification Report: \n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[17 11]
 [33 19]]

Accuracy is 0.45 

Precision is 0.6333333333333333 

Recall is 0.36538461538461536 

F1_score is 0.4634146341463415 

Classification Report: 
              precision    recall  f1-score   support

           0       0.34      0.61      0.44        28
           1       0.63      0.37      0.46        52

    accuracy                           0.45        80
   macro avg       0.49      0.49      0.45        80
weighted avg       0.53      0.45      0.45        80



# SVM Parametrs

In [None]:
svm  = SVC(class_weight='balanced')

parameter = {'gamma' : [0.0001, 0.001, 0.01, 0.1], 'C' : [0.01, 0.05, 0.5, 0.1, 1, 10, 15, 20]}

grid_search = GridSearchCV(svm, parameter)
grid_search.fit(X_train, y_train)

In [None]:
print(grid_search.best_params_)
print(grid_search.best_score_)

{'C': 1, 'gamma': 0.0001}
0.83125


In [None]:
svm_best  = SVC(gamma = 0.0001, C  = 1, class_weight='balanced')
svm_best.fit(X_train, y_train)

In [None]:
svm_best.fit(X_train, y_train)
y_pred = svm_best.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1score = f1_score(y_test, y_pred)

print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}\n")
print(f"Test Accuracy is {accuracy} \n")
print(f"Test Precision is {precision} \n")
print(f"Test Recall is {recall} \n")
print(f"Test F1_score is {f1score} \n")

print(f"Classification Report: \n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[25  3]
 [14 38]]

Test Accuracy is 0.7875 

Test Precision is 0.926829268292683 

Test Recall is 0.7307692307692307 

Test F1_score is 0.8172043010752689 

Classification Report: 
              precision    recall  f1-score   support

           0       0.64      0.89      0.75        28
           1       0.93      0.73      0.82        52

    accuracy                           0.79        80
   macro avg       0.78      0.81      0.78        80
weighted avg       0.83      0.79      0.79        80



# DecisionTreeClassifier

In [None]:
dtc = DecisionTreeClassifier(class_weight='balanced')
dtc.fit(X_train, y_train)

In [None]:
y_pred = dtc.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1score = f1_score(y_test, y_pred)

print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}\n")
print(f"Accuracy is {accuracy} \n")
print(f"Precision is {precision} \n")
print(f"Recall is {recall} \n")
print(f"F1_score is {f1score} \n")

print(f"Classification Report: \n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[28  0]
 [ 0 52]]

Accuracy is 1.0 

Precision is 1.0 

Recall is 1.0 

F1_score is 1.0 

Classification Report: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       1.00      1.00      1.00        52

    accuracy                           1.00        80
   macro avg       1.00      1.00      1.00        80
weighted avg       1.00      1.00      1.00        80



In [None]:
y_train_pred = dtc.predict(X_train)

train_accuracy = accuracy_score(y_train, y_train_pred)
train_precision = precision_score(y_train, y_train_pred)
train_recall = recall_score(y_train, y_train_pred)
train_f1score = f1_score(y_train, y_train_pred)

print("=== Training Data ===")
print(f"Training Accuracy is {train_accuracy} \n")
print(f"Training Precision is {train_precision} \n")
print(f"Training Recall is {train_recall} \n")
print(f"Training F1_score is {train_f1score} \n")

=== Training Data ===
Training Accuracy is 1.0 

Training Precision is 1.0 

Training Recall is 1.0 

Training F1_score is 1.0 



# DecisionTreeClassifier Parametrs

In [None]:
GRID_PARAMETER = {
    'criterion':['gini','entropy'],
    'max_depth':[3,5,7,10],
    'splitter':['best','random'],
    'min_samples_leaf':[1,2,3,5,7],
    'min_samples_split':[1,2,3,5,7]
}

grid_search_dtc = GridSearchCV(dtc, GRID_PARAMETER, cv=5)
grid_search_dtc.fit(X_train, y_train)

In [None]:
print(grid_search_dtc.best_params_)
print(grid_search_dtc.best_score_)

{'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 3, 'min_samples_split': 7, 'splitter': 'random'}
0.9875


In [None]:
dtc_best = grid_search_dtc.best_estimator_

y_pred = dtc_best.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1score = f1_score(y_test, y_pred)
print(f"Confusion Matrix: \n{confusion_matrix(y_test, y_pred)}\n")
print(f"Accuracy is {accuracy} \n")
print(f"Precision is {precision} \n")
print(f"Recall is {recall} \n")
print(f"F1_score is {f1score} \n")
print(f"Classification Report: \n{classification_report(y_test, y_pred)}")

Confusion Matrix: 
[[28  0]
 [ 1 51]]

Accuracy is 0.9875 

Precision is 1.0 

Recall is 0.9807692307692307 

F1_score is 0.9902912621359223 

Classification Report: 
              precision    recall  f1-score   support

           0       0.97      1.00      0.98        28
           1       1.00      0.98      0.99        52

    accuracy                           0.99        80
   macro avg       0.98      0.99      0.99        80
weighted avg       0.99      0.99      0.99        80

