In [2]:
# Shell 1: Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
# Shell 2: Loading the dataset
file_path = r'C:\Users\SIDDHESH\Desktop\Liver\Indian Liver Patient Dataset (ILPD).csv'
data = pd.read_csv(file_path)

In [4]:

# Shell 3: Preprocessing the data
# Encoding categorical variable 'gender' using LabelEncoder
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])

In [5]:
# Splitting the data into features and target
X = data.drop('is_patient', axis=1)
y = data['is_patient']


In [6]:

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Shell 4: Training the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


ValueError: Input X contains NaN.
LogisticRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [8]:
# Shell 1: Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Shell 2: Loading the dataset
file_path = r'C:\Users\SIDDHESH\Desktop\Liver\Indian Liver Patient Dataset (ILPD).csv'
data = pd.read_csv(file_path)

# Shell 3: Preprocessing the data
# Encoding categorical variable 'gender' using LabelEncoder
label_encoder = LabelEncoder()
data['gender'] = label_encoder.fit_transform(data['gender'])

# Splitting the data into features and target
X = data.drop('is_patient', axis=1)
y = data['is_patient']

# Handling missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# Shell 4: Training the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Shell 5: Evaluating the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Shell 6: Generating classification report and confusion matrix
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.7521367521367521
Classification Report:
              precision    recall  f1-score   support

           1       0.78      0.92      0.85        87
           2       0.53      0.27      0.36        30

    accuracy                           0.75       117
   macro avg       0.66      0.59      0.60       117
weighted avg       0.72      0.75      0.72       117

Confusion Matrix:
[[80  7]
 [22  8]]


In [9]:

# Shell 5: Evaluating the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.7521367521367521


In [11]:
# Shell 6: Saving the model
import joblib  # Import joblib module for saving the model
joblib.dump(model, 'liver_patient_model.pkl')
print("Model saved successfully.")

Model saved successfully.


In [14]:
import tkinter as tk
from tkinter import messagebox
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
import joblib

# Load the trained model
model = joblib.load('liver_patient_model.pkl')

# Create Tkinter window
root = tk.Tk()
root.title("Liver Patient Prediction")

# Function to preprocess input data
def preprocess_input(age, gender, tot_bilirubin, direct_bilirubin, alkphos, sgpt, sgot, tot_proteins, albumin, ag_ratio):
    # Encoding gender
    gender_encoder = LabelEncoder()
    gender_encoded = gender_encoder.fit_transform([gender])[0]

    # Handling missing values
    imputer = SimpleImputer(strategy='mean')
    input_data = [[age, gender_encoded, tot_bilirubin, direct_bilirubin, alkphos, sgpt, sgot, tot_proteins, albumin, ag_ratio]]
    input_data_imputed = imputer.fit_transform(input_data)

    return input_data_imputed

# Function to predict and display result
def predict():
    try:
        # Get user input
        age = float(entry_age.get())
        gender = entry_gender.get()
        tot_bilirubin = float(entry_tot_bilirubin.get())
        direct_bilirubin = float(entry_direct_bilirubin.get())
        alkphos = float(entry_alkphos.get())
        sgpt = float(entry_sgpt.get())
        sgot = float(entry_sgot.get())
        tot_proteins = float(entry_tot_proteins.get())
        albumin = float(entry_albumin.get())
        ag_ratio = float(entry_ag_ratio.get())

        # Preprocess input data
        input_data = preprocess_input(age, gender, tot_bilirubin, direct_bilirubin, alkphos, sgpt, sgot, tot_proteins, albumin, ag_ratio)

        # Make prediction
        prediction = model.predict(input_data)[0]

        # Display result
        if prediction == 1:
            messagebox.showinfo("Prediction", "The patient is likely to have liver disease.")
        else:
            messagebox.showinfo("Prediction", "The patient is likely to be healthy.")

    except ValueError:
        messagebox.showerror("Error", "Invalid input! Please enter numerical values for all fields.")

# Create input fields and labels
tk.Label(root, text="Age:").grid(row=0, column=0, padx=5, pady=5)
entry_age = tk.Entry(root)
entry_age.grid(row=0, column=1, padx=5, pady=5)

tk.Label(root, text="Gender:").grid(row=1, column=0, padx=5, pady=5)
entry_gender = tk.Entry(root)
entry_gender.grid(row=1, column=1, padx=5, pady=5)

tk.Label(root, text="Total Bilirubin:").grid(row=2, column=0, padx=5, pady=5)
entry_tot_bilirubin = tk.Entry(root)
entry_tot_bilirubin.grid(row=2, column=1, padx=5, pady=5)

tk.Label(root, text="Direct Bilirubin:").grid(row=3, column=0, padx=5, pady=5)
entry_direct_bilirubin = tk.Entry(root)
entry_direct_bilirubin.grid(row=3, column=1, padx=5, pady=5)

tk.Label(root, text="Alkaline Phosphotase:").grid(row=4, column=0, padx=5, pady=5)
entry_alkphos = tk.Entry(root)
entry_alkphos.grid(row=4, column=1, padx=5, pady=5)

tk.Label(root, text="Alamine Aminotransferase:").grid(row=5, column=0, padx=5, pady=5)
entry_sgpt = tk.Entry(root)
entry_sgpt.grid(row=5, column=1, padx=5, pady=5)

tk.Label(root, text="Aspartate Aminotransferase:").grid(row=6, column=0, padx=5, pady=5)
entry_sgot = tk.Entry(root)
entry_sgot.grid(row=6, column=1, padx=5, pady=5)

tk.Label(root, text="Total Protiens:").grid(row=7, column=0, padx=5, pady=5)
entry_tot_proteins = tk.Entry(root)
entry_tot_proteins.grid(row=7, column=1, padx=5, pady=5)

tk.Label(root, text="Albumin:").grid(row=8, column=0, padx=5, pady=5)
entry_albumin = tk.Entry(root)
entry_albumin.grid(row=8, column=1, padx=5, pady=5)

tk.Label(root, text="Albumin and Globulin Ratio:").grid(row=9, column=0, padx=5, pady=5)
entry_ag_ratio = tk.Entry(root)
entry_ag_ratio.grid(row=9, column=1, padx=5, pady=5)

# Create predict button
predict_button = tk.Button(root, text="Predict", command=predict)
predict_button.grid(row=10, column=0, columnspan=2, padx=5, pady=10)

# Run the Tkinter event loop
root.mainloop()
