In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import joblib

# Load the dataset
file_path = 'Liver_data.csv'
data = pd.read_csv(file_path)

# Handle missing values by imputing with the median value of the column using .loc to avoid chained assignment warnings
data.loc[:, 'Albumin_and_Globulin_Ratio'] = data['Albumin_and_Globulin_Ratio'].fillna(data['Albumin_and_Globulin_Ratio'].median())

# Encode the 'Gender' column using one-hot encoding
data = pd.get_dummies(data, columns=['Gender'], drop_first=True)

# Remap the target variable: original values [1, 2] are converted to [0, 1]
data['output'] = data['output'] - 1

# Define features and target variable
X = data.drop('output', axis=1)
y = data['output']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
print(report)

# Save the trained model to a file
model_path = 'liver_disease_model_xgboost.pkl'
joblib.dump(model, model_path)

print(f"Model saved to {model_path}")


{'0': {'precision': 0.8172043010752689, 'recall': 0.8735632183908046, 'f1-score': 0.8444444444444444, 'support': 87.0}, '1': {'precision': 0.5416666666666666, 'recall': 0.43333333333333335, 'f1-score': 0.48148148148148145, 'support': 30.0}, 'accuracy': 0.7606837606837606, 'macro avg': {'precision': 0.6794354838709677, 'recall': 0.653448275862069, 'f1-score': 0.662962962962963, 'support': 117.0}, 'weighted avg': {'precision': 0.7465536255858838, 'recall': 0.7606837606837606, 'f1-score': 0.7513770180436847, 'support': 117.0}}
Model saved to liver_disease_model_xgboost.pkl


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
