In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('indian_liver_patient.csv')
df.head()

Unnamed: 0,Age,Gender,Total_Bilirubin,Direct_Bilirubin,Alkaline_Phosphotase,Alamine_Aminotransferase,Aspartate_Aminotransferase,Total_Protiens,Albumin,Albumin_and_Globulin_Ratio,Dataset
0,65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1
1,62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
2,62,Male,7.3,4.1,490,60,68,7.0,3.3,0.89,1
3,58,Male,1.0,0.4,182,14,20,6.8,3.4,1.0,1
4,72,Male,3.9,2.0,195,27,59,7.3,2.4,0.4,1


In [3]:
df.columns

Index(['Age', 'Gender', 'Total_Bilirubin', 'Direct_Bilirubin',
       'Alkaline_Phosphotase', 'Alamine_Aminotransferase',
       'Aspartate_Aminotransferase', 'Total_Protiens', 'Albumin',
       'Albumin_and_Globulin_Ratio', 'Dataset'],
      dtype='object')

In [4]:
df.Albumin_and_Globulin_Ratio.value_counts()

Albumin_and_Globulin_Ratio
1.00    106
0.80     65
0.90     59
0.70     53
1.10     46
       ... 
1.72      1
0.46      1
0.39      1
1.02      1
0.37      1
Name: count, Length: 69, dtype: int64

In [5]:
df.isnull().sum()

Age                           0
Gender                        0
Total_Bilirubin               0
Direct_Bilirubin              0
Alkaline_Phosphotase          0
Alamine_Aminotransferase      0
Aspartate_Aminotransferase    0
Total_Protiens                0
Albumin                       0
Albumin_and_Globulin_Ratio    4
Dataset                       0
dtype: int64

In [6]:
df.Dataset.value_counts()

Dataset
1    416
2    167
Name: count, dtype: int64

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv("indian_liver_patient.csv")
df['Dataset'] = df['Dataset'].map({1: 1, 2: 0})
df = df.dropna()

# Encode Gender
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])  # Male=1, Female=0

# Features and Target
X = df.drop('Dataset', axis=1)
y = df['Dataset']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train the model
model = RandomForestClassifier(class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

# Optional: Show evaluation
print(classification_report(y_test, model.predict(X_test)))

# Predict using user input
def get_user_input():
    print("\nEnter Patient Details:")
    age = float(input("Age: "))
    gender = input("Gender (Male/Female): ").strip().lower()
    gender_encoded = 1 if gender == "male" else 0
    tb = float(input("Total Bilirubin: "))
    db = float(input("Direct Bilirubin: "))
    alk = float(input("Alkaline Phosphotase: "))
    alt = float(input("Alamine Aminotransferase: "))
    ast = float(input("Aspartate Aminotransferase: "))
    tp = float(input("Total Proteins: "))
    alb = float(input("Albumin: "))
    agr = float(input("Albumin and Globulin Ratio: "))

    features = [[age, gender_encoded, tb, db, alk, alt, ast, tp, alb, agr]]
    return features

def predict_from_input(features):
    prediction = model.predict(features)[0]
    probability = model.predict_proba(features)[0][1]  # class 1 = Liver disease
    print("\n🔍 Prediction Result:")
    print(f"→ Liver Disease Probability: {probability * 100:.2f}%")
    print(f"→ Final Diagnosis: {'Liver Disease' if prediction == 1 else 'No Liver Disease'}")

# Run
user_features = get_user_input()
predict_from_input(user_features)


              precision    recall  f1-score   support

           0       0.53      0.23      0.32        43
           1       0.66      0.88      0.75        73

    accuracy                           0.64       116
   macro avg       0.59      0.55      0.54       116
weighted avg       0.61      0.64      0.59       116


Enter Patient Details:


Age:  34
Gender (Male/Female):  male
Total Bilirubin:  3
Direct Bilirubin:  3
Alkaline Phosphotase:  4
Alamine Aminotransferase:  3
Aspartate Aminotransferase:  4
Total Proteins:  3
Albumin:  3
Albumin and Globulin Ratio:  4



🔍 Prediction Result:
→ Liver Disease Probability: 54.00%
→ Final Diagnosis: Liver Disease




In [8]:
import joblib

joblib.dump(model, 'Liver_cancer_model.pkl')

['Liver_cancer_model.pkl']

In [10]:
joblib.dump(le, 'liver_scaler.joblib')

['liver_scaler.joblib']