#  Liver Cirrhosis Disease prediction


In [1]:
# 1. Import Libraries
# =======================

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [2]:
# 2. Load Dataset
# =======================
url = '/content/indian_liver_patient.csv'
df = pd.read_csv(url)
df.rename(columns={'Dataset': 'Target'}, inplace=True)
df.head()

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Target
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.0,1
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.4,1


In [3]:
# 3. Handle Missing Data
# =======================
df.isnull().sum()
df['Albumin_and_Globulin_Ratio'].fillna(df['Albumin_and_Globulin_Ratio'].mean(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Albumin_and_Globulin_Ratio'].fillna(df['Albumin_and_Globulin_Ratio'].mean(), inplace=True)


In [4]:
# 4. Encode Categorical Data
# =======================
df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})

In [5]:
# 5. Train-Test Split
# =======================
X = df.drop('Target', axis=1)
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
# 6. Feature Scaling
# =======================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# 6. Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf))


Random Forest Accuracy: 0.7435897435897436
Classification Report:
               precision    recall  f1-score   support

           1       0.81      0.86      0.83        87
           2       0.50      0.40      0.44        30

    accuracy                           0.74       117
   macro avg       0.65      0.63      0.64       117
weighted avg       0.73      0.74      0.73       117



In [11]:
# 6.1 Support Vector Machine (SVM)
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train, y_train)

y_pred_svm = svm_model.predict(X_test)

print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Classification Report:\n", classification_report(y_test, y_pred_svm))


SVM Accuracy: 0.7435897435897436
Classification Report:
               precision    recall  f1-score   support

           1       0.74      1.00      0.85        87
           2       0.00      0.00      0.00        30

    accuracy                           0.74       117
   macro avg       0.37      0.50      0.43       117
weighted avg       0.55      0.74      0.63       117



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
# 6.2 K-Nearest Neighbors (KNN)
from sklearn.neighbors import KNeighborsClassifier

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

y_pred_knn = knn_model.predict(X_test)

print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Classification Report:\n", classification_report(y_test, y_pred_knn))


KNN Accuracy: 0.6837606837606838
Classification Report:
               precision    recall  f1-score   support

           1       0.78      0.79      0.79        87
           2       0.38      0.37      0.37        30

    accuracy                           0.68       117
   macro avg       0.58      0.58      0.58       117
weighted avg       0.68      0.68      0.68       117



In [13]:
# 6.3 Gradient Boosting Classifier
from sklearn.ensemble import GradientBoostingClassifier

gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X_train, y_train)

y_pred_gb = gb_model.predict(X_test)

print("Gradient Boosting Accuracy:", accuracy_score(y_test, y_pred_gb))
print("Classification Report:\n", classification_report(y_test, y_pred_gb))


Gradient Boosting Accuracy: 0.717948717948718
Classification Report:
               precision    recall  f1-score   support

           1       0.78      0.86      0.82        87
           2       0.43      0.30      0.35        30

    accuracy                           0.72       117
   macro avg       0.60      0.58      0.59       117
weighted avg       0.69      0.72      0.70       117



In [14]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

# Adjust target variable to be 0 or 1
y_train_adjusted = y_train - 1
y_test_adjusted = y_test - 1

xgb_model = XGBClassifier(eval_metric='mlogloss', random_state=42)
xgb_model.fit(X_train, y_train_adjusted)

y_pred_xgb = xgb_model.predict(X_test)

print("XGBoost Accuracy:", accuracy_score(y_test_adjusted, y_pred_xgb))
print("Classification Report:\n", classification_report(y_test_adjusted, y_pred_xgb))

XGBoost Accuracy: 0.717948717948718
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.83      0.81        87
           1       0.44      0.40      0.42        30

    accuracy                           0.72       117
   macro avg       0.62      0.61      0.62       117
weighted avg       0.71      0.72      0.71       117



In [15]:
# 6.5 Logistic Regression
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)

y_pred_lr = lr_model.predict(X_test)

print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
print("Classification Report:\n", classification_report(y_test, y_pred_lr))


Logistic Regression Accuracy: 0.7521367521367521
Classification Report:
               precision    recall  f1-score   support

           1       0.78      0.92      0.85        87
           2       0.53      0.27      0.36        30

    accuracy                           0.75       117
   macro avg       0.66      0.59      0.60       117
weighted avg       0.72      0.75      0.72       117



In [16]:
# 7. Model Training
# =======================
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)

In [17]:
# 7.1 Model Training
# =======================
rf = SVC(random_state=42)
rf.fit(X_train_scaled, y_train)

In [18]:
# 7.2 Model Training
# =======================
rf = KNeighborsClassifier()
rf.fit(X_train_scaled, y_train)

In [19]:
# 7.3 Model Training
# =======================
rf = GradientBoostingClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)

In [20]:
# 7.4 Model Training
# =======================
rf = XGBClassifier(random_state=42)
rf.fit(X_train_scaled, y_train - 1)

In [21]:
# 7.5 Model Training
# =======================
rf = LogisticRegression(random_state=42)
rf.fit(X_train_scaled, y_train)

In [22]:
# 8. Model Evaluation
# =======================
y_pred = rf.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.7606837606837606

Classification Report:
               precision    recall  f1-score   support

           1       0.79      0.92      0.85        87
           2       0.56      0.30      0.39        30

    accuracy                           0.76       117
   macro avg       0.68      0.61      0.62       117
weighted avg       0.73      0.76      0.73       117


Confusion Matrix:
 [[80  7]
 [21  9]]


In [23]:
# 9. Save the Model and Scaler
# =======================

joblib.dump(scaler, 'normalizer.pkl')

['normalizer.pkl']

In [24]:
joblib.dump(rf, 'rf_acc_68.pkl')

['rf_acc_68.pkl']