In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
Than_df = pd.read_csv('/content/drive/MyDrive/dulieu_medical/kidney_disease.csv')

In [None]:
Than_df.head

In [None]:
plt.hist(Than_df['age'])
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()

In [None]:
import seaborn as sns

fig, axes = plt.subplots(12, 2, figsize=(20,60))
axes = axes.flatten()

columns = ["age","bp","sg","al","su","rbc","pc","pcc","ba","bgr","bu","sc","sod","pot","hemo","pcv","wc","rc","htn","dm","cad","appet","pe","ane"]

for i, cols in enumerate(columns):
  sns.histplot(x=Than_df[cols], hue=Than_df["classification"], kde=True, palette="magma", ax=axes[i])
  axes[i].set_xlabel(cols, fontsize=16)
  axes[i].set_ylabel("Count", fontsize=16)
  axes[i].set_title(f"Histogram of {cols}", fontsize=18)
  axes[i].legend(fontsize=14)

fig.suptitle("Distribution of Various Test Results by classification", fontsize=25)

plt.subplots_adjust(hspace=0.5, wspace=0.3)

In [6]:
Than_df.columns = ['ID', 'age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hemo', 'pcv', 'wc', 'rc', 'htn', 'dm', 'cad', 'appet', 'pe', 'ane', 'classification']

In [None]:
Than_df.head(20)

In [8]:
Than_df = Than_df.drop('ID', axis=1)

In [9]:
Than_df['rbc'] = Than_df.rbc.replace(['normal','abnormal'], ['1', '0'])
Than_df['pc'] = Than_df.pc.replace(['normal','abnormal'], ['1', '0'])
Than_df['pcc'] = Than_df.pcc.replace(['present','notpresent'], ['1', '0'])
Than_df['ba'] = Than_df.ba.replace(['present','notpresent'], ['1', '0'])
Than_df['htn'] = Than_df.htn.replace(['yes','no'], ['1', '0'])
Than_df['dm'] = Than_df.dm.replace(['yes','no'], ['1', '0'])
Than_df['cad'] = Than_df.cad.replace(['yes','no'], ['1', '0'])
Than_df['appet'] = Than_df.appet.replace(['good','poor'], ['1', '0'])
Than_df['pe'] = Than_df.pe.replace(['yes','no'], ['1', '0'])
Than_df['ane'] = Than_df.ane.replace(['yes','no'], ['1', '0'])
Than_df['classification'] = Than_df.classification.replace(['ckd', 'notckd'], ['1', '0'])

In [None]:
Than_df

In [None]:
Than_df = Than_df.drop_duplicates()

print(Than_df.isnull().sum())

In [14]:
Than_df.age = pd.to_numeric(Than_df.age, errors='coerce')
Than_df.bp = pd.to_numeric(Than_df.bp, errors='coerce')
Than_df.sg = pd.to_numeric(Than_df.sg, errors='coerce')
Than_df.al = pd.to_numeric(Than_df.al, errors='coerce')
Than_df.su = pd.to_numeric(Than_df.su, errors='coerce')
Than_df.rbc = pd.to_numeric(Than_df.rbc, errors='coerce')
Than_df.pc = pd.to_numeric(Than_df.pc, errors='coerce')
Than_df.pcc = pd.to_numeric(Than_df.pcc, errors='coerce')
Than_df.ba = pd.to_numeric(Than_df.ba, errors='coerce')
Than_df.bgr = pd.to_numeric(Than_df.bgr, errors='coerce')
Than_df.bu = pd.to_numeric(Than_df.bu, errors='coerce')
Than_df.sc = pd.to_numeric(Than_df.sc, errors='coerce')
Than_df.sod = pd.to_numeric(Than_df.sod, errors='coerce')
Than_df.pot = pd.to_numeric(Than_df.pot, errors='coerce')
Than_df.hemo = pd.to_numeric(Than_df.hemo, errors='coerce')
Than_df.pcv = pd.to_numeric(Than_df.pcv, errors='coerce')
Than_df.wc = pd.to_numeric(Than_df.wc, errors='coerce')
Than_df.rc = pd.to_numeric(Than_df.rc, errors='coerce')
Than_df.htn = pd.to_numeric(Than_df.htn, errors='coerce')
Than_df.dm = pd.to_numeric(Than_df.dm, errors='coerce')
Than_df.cad = pd.to_numeric(Than_df.cad, errors='coerce')
Than_df.appet = pd.to_numeric(Than_df.appet, errors='coerce')
Than_df.pe = pd.to_numeric(Than_df.pe, errors='coerce')
Than_df.ane = pd.to_numeric(Than_df.ane, errors='coerce')
Than_df.classification = pd.to_numeric(Than_df.classification, errors='coerce')

In [17]:
Than_df['age'].fillna(Than_df['age'].mean(), inplace=True)
Than_df['bp'].fillna(Than_df['bp'].mean(), inplace=True)
Than_df['sg'].fillna(Than_df['sg'].mean(), inplace=True)
Than_df['al'].fillna(Than_df['al'].mean(), inplace=True)
Than_df['su'].fillna(Than_df['su'].mean(), inplace=True)
Than_df['rbc'].fillna(Than_df['rbc'].mean(), inplace=True)
Than_df['pc'].fillna(Than_df['pc'].mean(), inplace=True)
Than_df['pcc'].fillna(Than_df['pcc'].mean(), inplace=True)
Than_df['ba'].fillna(Than_df['ba'].mean(), inplace=True)
Than_df['bgr'].fillna(Than_df['bgr'].mean(), inplace=True)
Than_df['bu'].fillna(Than_df['bu'].mean(), inplace=True)
Than_df['sc'].fillna(Than_df['sc'].mean(), inplace=True)
Than_df['sod'].fillna(Than_df['sod'].mean(), inplace=True)
Than_df['pot'].fillna(Than_df['pot'].mean(), inplace=True)
Than_df['hemo'].fillna(Than_df['hemo'].mean(), inplace=True)
Than_df['pcv'].fillna(Than_df['pcv'].mean(), inplace=True)
Than_df['wc'].fillna(Than_df['wc'].mean(), inplace=True)
Than_df['rc'].fillna(Than_df['rc'].mean(), inplace=True)
Than_df['htn'].fillna(Than_df['htn'].mean(), inplace=True)
Than_df['dm'].fillna(Than_df['dm'].mean(), inplace=True)
Than_df['cad'].fillna(Than_df['cad'].mean(), inplace=True)
Than_df['appet'].fillna(Than_df['appet'].mean(), inplace=True)
Than_df['pe'].fillna(Than_df['pe'].mean(), inplace=True)
Than_df['ane'].fillna(Than_df['ane'].mean(), inplace=True)
Than_df['classification'].fillna(Than_df['classification'].mean(), inplace=True)

In [None]:
Than_df.isnull().sum()

In [19]:
Than_df.shape

(400, 25)

In [None]:
Than_df['classification'].value_counts()

In [21]:
X = Than_df.drop(columns='classification', axis=1)
Y = Than_df['classification']

In [None]:
print(X)
print(Y)

In [23]:
scaler = StandardScaler()

In [None]:
scaler.fit(X)

In [None]:
standardized_data = scaler.transform(X)

print(standardized_data)

In [None]:
X = standardized_data
Y = Than_df['classification']

print(X)
print(Y)

In [27]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [28]:
print(X.shape, X_train.shape, X_test.shape)

(400, 24) (320, 24) (80, 24)


In [31]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [32]:
regressor = LinearRegression()

regressor.fit(X_train, Y_train)

Y_train_prediction = regressor.predict(X_train)

training_data_r2_score = r2_score(Y_train, Y_train_prediction)

In [33]:
print('accuracy = ', training_data_r2_score)

accuracy =  0.7480127935172851


In [34]:
Y_test_prediction = regressor.predict(X_test)

test_data_r2_score = r2_score(Y_test, Y_test_prediction)

In [35]:
print('accurancy data testing = ', test_data_r2_score)

accurancy data testing =  0.7297216654950983


In [39]:
input = (30.0, 80.0, 1.02, 4.0, 0.0, 1, 0, 1, 1, 117.0, 56.0, 3.8, 111.0, 2.5, 11.2, 32, 6700, 3.9, 1, 0, 0, 1, 1, 1)

input_np_array = np.array(input)

input_reshape = input_np_array.reshape(1, -1)

std_data = scaler.transform(input_reshape)
print(std_data)

prediction = regressor.predict(std_data)
print(prediction)

if(prediction[0] == 0):
  print('Chuẩn đoán: bệnh nhân không bị ckd')
else:
  print('Chuẩn đoán: bệnh nhân bị ckd')

[[-1.26717664  0.26233836  0.48335471  2.3475161  -0.4377969   0.61412257
  -2.01720798  2.91782599  4.14387707 -0.4155428  -0.02896381  0.12967657
  -2.88583096 -0.75534462 -0.48895959 -0.84566942 -0.67701553 -0.96207588
   1.309986   -0.72799686 -0.30801196  0.50923805  2.0641346   2.37994966]]
[0.64441894]
Chuẩn đoán: bệnh nhân bị ckd




In [41]:
input = (58.0, 80.0, 1.025, 0.0, 0.0, 0, 0, 1, 1, 131.0, 18.0, 1.1, 141.0, 3.5, 15.8, 53, 6800, 6.1, 1, 1, 1, 1, 1, 1)
#58.0,80.0,1.025,0.0,0.0,normal,normal,notpresent,notpresent,131.0,18.0,1.1,141.0,3.5,15.8,53,6800,6.1,no,no,no,good,no,no,notckd

input_np_array = np.array(input)

input_reshape = input_np_array.reshape(1, -1)

std_data = scaler.transform(input_reshape)
print(std_data)

prediction = regressor.predict(std_data)
print(prediction)

if(prediction[0] == 0):
  print('Chuẩn đoán: bệnh nhân không bị ckd')
else:
  print('Chuẩn đoán: bệnh nhân bị ckd')

[[ 0.38437691  0.26233836  1.41572747 -0.80028958 -0.4377969  -2.62635395
  -2.01720798  2.91782599  4.14387707 -0.22809911 -0.80094118 -0.35156706
   0.37760647 -0.40026323  1.20672175  1.73390234 -0.637334    1.65927083
   1.309986    1.4016656   3.27942142  0.50923805  2.0641346   2.37994966]]
[0.05390601]
Chuẩn đoán: bệnh nhân bị ckd


