In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('liver_dataset_smote.csv')

In [None]:
from sklearn.linear_model import LogisticRegression,Lasso,Ridge
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier 
from sklearn.svm import SVC
from sklearn.metrics import f1_score,confusion_matrix,ConfusionMatrixDisplay,classification_report,roc_auc_score,recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB   
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
print(df['Dataset'].unique())
df = df.rename(columns={"Dataset": "Output"})
df["Output"] = df["Output"].map({1: 0, 2: 1})
print(df['Output'].unique())

In [None]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=4)
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [None]:
models = {
    "Logistic Regression": LogisticRegression(class_weight='balanced',max_iter=1000),
    "Lasso (L1)": LogisticRegression(penalty="l1", solver="liblinear", max_iter=1000),
    "Ridge (L2)": LogisticRegression(penalty="l2", solver="lbfgs", max_iter=1000),
    "ElasticNet (L1+L2)": LogisticRegression(penalty="elasticnet", solver="saga", l1_ratio=0.5, max_iter=1000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(class_weight='balanced'),
    "KNN": KNeighborsClassifier(),
    "SVM": SVC(class_weight='balanced',probability=True),
    "Gaussian NB": GaussianNB(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(scale_pos_weight = (neg/pos),use_label_encoder=False, eval_metric="logloss"),
    "AdaBoost":AdaBoostClassifier()
}

In [None]:
param_grids = {
    "Random Forest": {
        "n_estimators": [100, 200],
        "max_depth": [1, 5, 10]
    },
    "SVM": {
        "C": [0.1, 1, 10],
        "kernel": ["linear", "rbf"]
    },
    "KNN": {
        "n_neighbors": [3, 5, 7, 9]
    },
    "Logistic Regression": {
        "C": [0.01, 0.1, 1, 10],
        "penalty": ["l2"]
    }
}

results = []

In [None]:
import pickle
best_model = None
best_recall = 0
best_name = ""
for name, model in models.items():
    print(f"\nTraining {name}...")
    
    if name in param_grids:  
        search = RandomizedSearchCV(model, param_grids[name], cv=5, scoring="accuracy", n_jobs=-1)
        search.fit(X_train, y_train)
        best_model = search.best_estimator_
        print("Best Params:", search.best_params_)
    else:
        best_model = model
        best_model.fit(X_train, y_train)
    
    y_pred = best_model.predict(X_test)


    if hasattr(best_model, "predict_proba"):
        y_proba = best_model.predict_proba(X_test)[:, 1]
        y_pred = (y_proba >= 0.3).astype(int)  
    else:
        y_proba = best_model.decision_function(X_test)

    recall=recall_score(y_test,y_pred)
    acc = f1_score(y_test,y_pred)
    roc = roc_auc_score(y_test, y_proba)
    if recall > best_recall:
        best_recall = recall
        best_model = model
        best_name = name

    print("recall_score:", recall)
    print("ROC-AUC:", roc)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

   
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap="Blues")
    plt.title(f"{name} - Confusion Matrix")
    plt.show()

   
    results.append([name, acc, roc])


In [None]:
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score

results = []

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:,1] if hasattr(model, "predict_proba") else None
    
    results.append({
        "Model": name,
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred),
        "Recall": recall_score(y_test, y_pred),
        "F1-Score": f1_score(y_test, y_pred),
        "ROC-AUC": roc_auc_score(y_test, y_proba) if y_proba is not None else None
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results)
print("\n📊 Model Comparison Table:\n")
print(results_df)

# Find the best model based on Recall
best_model_name = results_df.loc[results_df["Recall"].idxmax(), "Model"]
print(f"\n✅ Best model based on Recall Score: {best_model_name}")



In [None]:
with open("model.pkl", "wb") as f:
    pickle.dump((scaler, best_model), f)