In [None]:
# Importing the Dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

In [None]:
# loading the csv data to a Pandas DataFrame (replace the path with the liver disease dataset)
liver_data = pd.read_csv('/content/liver_disease_data.csv')

In [None]:
# Preprocess
liver_data['Gender'] = liver_data['Gender'].apply(lambda x: 1 if x == 'Male' else 0)
liver_data['Dataset'] = liver_data['Dataset'].apply(lambda x: 1 if x == 1 else 0)

In [None]:
# Separating the features & Target
X = liver_data .drop(columns='Dataset', axis=1)
Y = liver_data['Dataset']

In [None]:
# Handle Missing Values
X.fillna(X.mean(), inplace=True)

In [None]:
# Split the Data into Training and Test Sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [None]:
# Hyperparameter Tuning
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2'],
    'solver': ['liblinear']
}

grid_search = GridSearchCV(LogisticRegression(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, Y_train)

In [None]:
# Best parameters and best model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

In [None]:
# Model Evaluation
# Accuracy Score on the training data
X_train_prediction = best_model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy on Training data : ', training_data_accuracy)

# Accuracy Score on the test data
X_test_prediction = best_model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy on Test data : ', test_data_accuracy)

Accuracy on Training data :  0.7446351931330472
Accuracy on Test data :  0.6410256410256411


In [None]:
# Input data for Logistic Regression (replace with appropriate liver disease data)
input_data_reshaped = np.asarray((65,0,0.7,0.1,187,16,18,6.8,3.3,0.9)).reshape(1, -1)

In [None]:
# Building a Predictive System
# Prediction with Logistic Regression
prediction = best_model.predict(input_data_reshaped)
if prediction[0] == 0:
    print('The Person does not have Liver Disease')
else:
    print('The Person has Liver Disease')

The Person has Liver Disease




In [None]:
# Saving the best model using pickle
filename = 'liver_disease_detection_model.sav'
pickle.dump(best_model, open(filename, 'wb'))