**Rounding the values**

In [None]:
import pandas as pd
from google.colab import files

# Upload CSV file
uploaded = files.upload()

# Get file name
filename = list(uploaded.keys())[0]

# Load dataset
df = pd.read_csv(filename)

# Define columns for rounding
round_2_decimals = ["Total Protiens", "ALB Albumin", "A/G Ratio Albumin and Globulin Ratio",
                     "Total Bilirubin", "Direct Bilirubin"]

round_to_nearest = ["Age of the patient","Alkphos Alkaline Phosphotase", "Sgpt Alamine Aminotransferase", "Sgot Aspartate Aminotransferase"]

# Apply rounding
df[round_2_decimals] = df[round_2_decimals].round(2)  # Round selected columns to 2 decimal places
df[round_to_nearest] = df[round_to_nearest].round()  # Round selected columns to the nearest whole number

# Save the modified dataset
output_filename = "holidays.csv"
df.to_csv(output_filename, index=False)

# Download the updated file
files.download(output_filename)

print("File processing complete. Download started.")


Saving Processed_Dataset_Winsorized (5).csv to Processed_Dataset_Winsorized (5).csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

File processing complete. Download started.


**user defined values**

In [None]:
import pandas as pd
import numpy as np
import time
import joblib  # For saving and loading models
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.preprocessing import StandardScaler
from google.colab import files
import io

# Step 1: Upload Dataset
print("📂 Please upload your dataset...")
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[file_name]), encoding="ISO-8859-1")

# Define normal ranges (from medical references)
normal_ranges = {
    "Total Bilirubin": (0.1, 1.2),
    "Direct Bilirubin": (0.0, 0.3),
    "Alkaline Phosphatase": (44, 147),
    "SGPT": (7, 56),
    "SGOT": (10, 40),
    "Total Proteins": (6.0, 8.3),
    "ALB": (3.5, 5.0),
    "A/G Ratio": (1.1, 2.5)
}

# Step 2: Add Binary Features for Normal Ranges
for feature, (low, high) in normal_ranges.items():
    if feature in df.columns:
        df[f"{feature}_Abnormal"] = ((df[feature] < low) | (df[feature] > high)).astype(int)

# Step 3: Prepare Data
target_column = "Result"
X = df.drop(columns=[target_column])
y = df[target_column]

# Standardize Numeric Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Split Data (70% Training, 30% Testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# Step 5: Define Models
models = {
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss"),
    "Random Forest": RandomForestClassifier(),
    "LightGBM": LGBMClassifier(),
    "Extra Trees": ExtraTreesClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Bagged Decision Tree": BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10),
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Gaussian Naïve Bayes": GaussianNB()
}

# Step 6: Train & Evaluate Models
best_model = None
best_accuracy = 0
for name, model in models.items():
    print(f"🔄 Training {name}...")
    start_time = time.time()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred) * 100

    if acc > best_accuracy:
        best_accuracy = acc
        best_model = model  # Save the best performing model

# Save the Best Model
joblib.dump(best_model, "Best_Liver_Disease_Model.pkl")
files.download("Best_Liver_Disease_Model.pkl")
print(f"✅ Best Model ({type(best_model).__name__}) saved with accuracy: {best_accuracy:.2f}%")

# Step 7: Function to Test New Patient Data
def predict_liver_disease():
    print("\n📌 Enter patient details below:")
    patient_data = []

    for feature in X.columns:
        value = float(input(f"Enter {feature}: "))
        patient_data.append(value)

    # Convert input data to array and standardize
    patient_array = np.array(patient_data).reshape(1, -1)
    patient_scaled = scaler.transform(patient_array)

    # Load the best model
    best_model = joblib.load("Best_Liver_Disease_Model.pkl")

    # Make Prediction
    prediction = best_model.predict(patient_scaled)[0]
    result = "Liver Disease Detected (1)" if prediction == 1 else "Healthy (0)"

    print(f"\n🔍 **Prediction:** {result}")

# Step 8: Run User Input Function
predict_liver_disease()


📂 Please upload your dataset...


Saving Training_Dataset_70_30 (1).csv to Training_Dataset_70_30 (1).csv
🔄 Training XGBoost...


Parameters: { "use_label_encoder" } are not used.



🔄 Training Random Forest...
🔄 Training LightGBM...
[LightGBM] [Info] Number of positive: 10603, number of negative: 10602
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002434 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1125
[LightGBM] [Info] Number of data points in the train set: 21205, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500024 -> initscore=0.000094
[LightGBM] [Info] Start training from score 0.000094




🔄 Training Extra Trees...
🔄 Training Gradient Boosting...
🔄 Training Bagged Decision Tree...
🔄 Training Decision Tree...
🔄 Training Logistic Regression...
🔄 Training K-Nearest Neighbors...
🔄 Training Gaussian Naïve Bayes...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Best Model (RandomForestClassifier) saved with accuracy: 99.66%

📌 Enter patient details below:
Enter Age of the patient: -0.953230801
Enter Gender: 0.495063232
Enter Total Bilirubin: -0.642555554
Enter Direct Bilirubin: -0.542082829
Enter Alkphos Alkaline Phosphotase: -0.403203339
Enter Sgpt Alamine Aminotransferase: 2.169244172
Enter Sgot Aspartate Aminotransferase: 0.275259843
Enter Total Protiens: 0.167026416
Enter ALB Albumin: 0.489290622
Enter A/G Ratio Albumin and Globulin Ratio: 0.890427526
Enter Total Bilirubin_Abnormal: -0.642555554
Enter Direct Bilirubin_Abnormal: -0.542082829

🔍 **Prediction:** Healthy (0)




In [None]:
import pandas as pd
import numpy as np
import time
import joblib  # For saving and loading models
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier, ExtraTreesClassifier, BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.preprocessing import StandardScaler
from google.colab import files
import io

# Step 1: Upload Dataset
print("📂 Please upload your dataset...")
uploaded = files.upload()
file_name = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[file_name]), encoding="ISO-8859-1")

# Define normal ranges (from medical references)
normal_ranges = {
    "Total Bilirubin": (0.1, 1.2),
    "Direct Bilirubin": (0.0, 0.3),
    "Alkaline Phosphatase": (44, 147),
    "SGPT": (7, 56),
    "SGOT": (10, 40),
    "Total Proteins": (6.0, 8.3),
    "ALB": (3.5, 5.0),
    "A/G Ratio": (1.1, 2.5)
}

# Step 2: Add Binary Features for Normal Ranges
for feature, (low, high) in normal_ranges.items():
    if feature in df.columns:
        df[f"{feature}_Abnormal"] = ((df[feature] < low) | (df[feature] > high)).astype(int)

# Step 3: Prepare Data
target_column = "Result"
X = df.drop(columns=[target_column])
y = df[target_column]

# Standardize Numeric Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Split Data (70% Training, 30% Testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

# Step 5: Define Models
models = {
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss"),
    "Random Forest": RandomForestClassifier(),
    "LightGBM": LGBMClassifier(),
    "Extra Trees": ExtraTreesClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "Bagged Decision Tree": BaggingClassifier(estimator=DecisionTreeClassifier(), n_estimators=10),
    "Decision Tree": DecisionTreeClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Gaussian Naïve Bayes": GaussianNB()
}

# Step 6: Train & Evaluate Models
best_model = None
best_accuracy = 0
for name, model in models.items():
    print(f"🔄 Training {name}...")
    start_time = time.time()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred) * 100

    if acc > best_accuracy:
        best_accuracy = acc
        best_model = model  # Save the best performing model

# Save the Best Model
joblib.dump(best_model, "Best_Liver_Disease_Model.pkl")
files.download("Best_Liver_Disease_Model.pkl")
print(f"✅ Best Model ({type(best_model).__name__}) saved with accuracy: {best_accuracy:.2f}%")

# Step 7: Function to Test New Patient Data
def predict_liver_disease():
    print("\n📌 Enter patient details below:")
    patient_data = []

    for feature in X.columns:
        value = float(input(f"Enter {feature}: "))
        patient_data.append(value)

    # Convert input data to array and standardize
    patient_array = np.array(patient_data).reshape(1, -1)
    patient_scaled = scaler.transform(patient_array)

    # Load the best model
    best_model = joblib.load("Best_Liver_Disease_Model.pkl")

    # Make Prediction
    prediction = best_model.predict(patient_scaled)[0]
    result = "Liver Disease Detected (1)" if prediction == 1 else "Healthy (0)"

    print(f"\n🔍 **Prediction:** {result}")

# Step 8: Run User Input Function
predict_liver_disease()


📂 Please upload your dataset...


Saving Training_Dataset_70_30 (1).csv to Training_Dataset_70_30 (1) (1).csv
🔄 Training XGBoost...


Parameters: { "use_label_encoder" } are not used.



🔄 Training Random Forest...
🔄 Training LightGBM...
[LightGBM] [Info] Number of positive: 10603, number of negative: 10602
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002502 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1125
[LightGBM] [Info] Number of data points in the train set: 21205, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500024 -> initscore=0.000094
[LightGBM] [Info] Start training from score 0.000094




🔄 Training Extra Trees...
🔄 Training Gradient Boosting...
🔄 Training Bagged Decision Tree...
🔄 Training Decision Tree...
🔄 Training Logistic Regression...
🔄 Training K-Nearest Neighbors...
🔄 Training Gaussian Naïve Bayes...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Best Model (RandomForestClassifier) saved with accuracy: 99.56%

📌 Enter patient details below:
Enter Age of the patient: -1.016323217
Enter Gender: 0.495063232
Enter Total Bilirubin: 0.128753635
Enter Direct Bilirubin: 1.615311467
Enter Alkphos Alkaline Phosphotase: -0.716293664
Enter Sgpt Alamine Aminotransferase: 0.408178815
Enter Sgot Aspartate Aminotransferase: -0.185920792
Enter Total Protiens: -0.956270562
Enter ALB Albumin: -0.931682314
Enter A/G Ratio Albumin and Globulin Ratio: -0.512762422
Enter Total Bilirubin_Abnormal: 0.128753635
Enter Direct Bilirubin_Abnormal: 1.615311467

🔍 **Prediction:** Liver Disease Detected (1)


