<a href="https://colab.research.google.com/github/Akahaybasutkar/LiverCirrhosis/blob/main/Cirrhosis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing all the libraries

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.impute import KNNImputer
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Ignoring all the warning

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('mode.chained_assignment', None)

# Importing the dataset

In [None]:
path = "/content/cirrhosis.csv"
data = pd.read_csv(path)
# print(data)
print(data.info())
# print(data.shape)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   ID             418 non-null    int64  
 1   N_Days         418 non-null    int64  
 2   Status         418 non-null    object 
 3   Drug           312 non-null    object 
 4   Age            418 non-null    int64  
 5   Sex            418 non-null    object 
 6   Ascites        312 non-null    object 
 7   Hepatomegaly   312 non-null    object 
 8   Spiders        312 non-null    object 
 9   Edema          418 non-null    object 
 10  Bilirubin      418 non-null    float64
 11  Cholesterol    284 non-null    float64
 12  Albumin        418 non-null    float64
 13  Copper         310 non-null    float64
 14  Alk_Phos       312 non-null    float64
 15  SGOT           312 non-null    float64
 16  Tryglicerides  282 non-null    float64
 17  Platelets      407 non-null    float64
 18  Prothrombi

# Preprocessing the DataSet

Finding the Null values in the Dataset

In [None]:
data.isnull().sum()

ID                 0
N_Days             0
Status             0
Drug             106
Age                0
Sex                0
Ascites          106
Hepatomegaly     106
Spiders          106
Edema              0
Bilirubin          0
Cholesterol      134
Albumin            0
Copper           108
Alk_Phos         106
SGOT             106
Tryglicerides    136
Platelets         11
Prothrombin        2
Stage              6
dtype: int64

Filling the Null values with mean and median


In [None]:
for col in data.columns:
  if data[col].dtype in ['int64', 'float64']:
    data[col].fillna(data[col].mean(), inplace = True)
  elif data[col].dtype == 'object':
    data[col].fillna(data[col].mode(), inplace = True)

In [None]:
data.isnull().sum()

# Encoding the dataset categorical data columns into continous data

In [None]:
labelEncoder = LabelEncoder()
for col in data.columns:
  if data[col].dtype == 'object':
    data[col] = labelEncoder.fit_transform(data[col])

data.head()

Unnamed: 0,ID,N_Days,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage
0,1,400,2,0,21464,0,1,1,1,2,14.5,261.0,2.6,156.0,1718.0,137.95,172.0,190.0,12.2,4.0
1,2,4500,0,0,20617,0,0,1,1,0,1.1,302.0,4.14,54.0,7394.8,113.52,88.0,221.0,10.6,3.0
2,3,1012,2,0,25594,1,0,0,0,1,1.4,176.0,3.48,210.0,516.0,96.1,55.0,151.0,12.0,4.0
3,4,1925,2,0,19994,0,0,1,1,1,1.8,244.0,2.54,64.0,6121.8,60.63,92.0,183.0,10.3,4.0
4,5,1504,1,1,13918,0,0,1,1,0,3.4,279.0,3.53,143.0,671.0,113.15,72.0,136.0,10.9,3.0


# Performing Feature Scaling

In [None]:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
data = pd.DataFrame(scaled_data, columns= data.columns)
data['Stage'] = data['Stage']
data.head()

Unnamed: 0,ID,N_Days,Status,Drug,Age,Sex,Ascites,Hepatomegaly,Spiders,Edema,Bilirubin,Cholesterol,Albumin,Copper,Alk_Phos,SGOT,Tryglicerides,Platelets,Prothrombin,Stage
0,0.0,0.075515,1.0,0.0,0.622822,0.0,0.5,0.5,0.5,1.0,0.512635,0.085196,0.238806,0.260274,0.105279,0.258993,0.246018,0.194234,0.355556,1.0
1,0.002398,0.937947,0.0,0.0,0.578364,0.0,0.0,0.5,0.5,0.0,0.028881,0.10997,0.813433,0.085616,0.523509,0.202298,0.097345,0.241275,0.177778,0.666667
2,0.004796,0.204249,1.0,0.0,0.839597,1.0,0.0,0.0,0.0,0.5,0.039711,0.033837,0.567164,0.35274,0.016724,0.161871,0.038938,0.135053,0.333333,1.0
3,0.007194,0.396298,1.0,0.0,0.545664,0.0,0.0,0.5,0.5,0.5,0.054152,0.074924,0.216418,0.10274,0.429723,0.079554,0.104425,0.183612,0.144444,1.0
4,0.009592,0.307741,0.5,0.5,0.226748,0.0,0.0,0.5,0.5,0.0,0.111913,0.096073,0.585821,0.238014,0.028143,0.201439,0.069027,0.112291,0.211111,0.666667


In [None]:
data['Stage'] = data["Stage"].astype(int)

# Dividing the data into inputs and outputs

In [None]:
inputs = data.drop(['ID', 'Stage'],axis=1)
output = data.drop(['ID', 'N_Days', 'Status', 'Drug', 'Age', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema', 'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos', 'SGOT', 'Tryglicerides', 'Platelets', 'Prothrombin'], axis=1)
print(inputs.head())
print(output.head())

     N_Days  Status  Drug       Age  Sex  Ascites  Hepatomegaly  Spiders  \
0  0.075515     1.0   0.0  0.622822  0.0      0.5           0.5      0.5   
1  0.937947     0.0   0.0  0.578364  0.0      0.0           0.5      0.5   
2  0.204249     1.0   0.0  0.839597  1.0      0.0           0.0      0.0   
3  0.396298     1.0   0.0  0.545664  0.0      0.0           0.5      0.5   
4  0.307741     0.5   0.5  0.226748  0.0      0.0           0.5      0.5   

   Edema  Bilirubin  Cholesterol   Albumin    Copper  Alk_Phos      SGOT  \
0    1.0   0.512635     0.085196  0.238806  0.260274  0.105279  0.258993   
1    0.0   0.028881     0.109970  0.813433  0.085616  0.523509  0.202298   
2    0.5   0.039711     0.033837  0.567164  0.352740  0.016724  0.161871   
3    0.5   0.054152     0.074924  0.216418  0.102740  0.429723  0.079554   
4    0.0   0.111913     0.096073  0.585821  0.238014  0.028143  0.201439   

   Tryglicerides  Platelets  Prothrombin  
0       0.246018   0.194234     0.355556  


# Splitting the data into testing and training data

In [None]:
x_test, x_train, y_test, y_train = train_test_split(inputs, output, train_size=0.9)

# Creating a ML model and fitting the data in the model

In [None]:
model = KNeighborsClassifier(n_neighbors=23)
model.fit(x_train, y_train)

# Testing/Predicting the outputs of the Model

In [None]:
y_pred = model.predict(x_test)
y_test
y_pred

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,

# Calculating the accuracy of the Model

In [None]:
accuracy = (accuracy_score(y_test, y_pred)) * 100
print(f"The accuracy of the model is: {accuracy}%")

The accuracy of the model is: 66.48936170212765%
