In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report  # <-- this was missing

# Step 1: Load the dataset
df = pd.read_excel("Final_dataset.xlsx")

# Step 2: Fill missing values with mode
for col in df.columns:
    df[col] = df[col].fillna(df[col].mode()[0])

# Step 3: Split into features and target
X = df.drop(columns=["ID", "Disease_Risk"])
y = df["Disease_Risk"]

# Step 4: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Parameter grid
param_grid = {
    "max_depth": [3, 5, 10, None],
    "min_samples_split": [2, 5, 10],
    "min_samples_leaf": [1, 2, 4]
}

# Step 6: RandomizedSearchCV
search = RandomizedSearchCV(
    DecisionTreeClassifier(random_state=42),
    param_distributions=param_grid,
    n_iter=5,
    cv=3,
    random_state=42,
    n_jobs=-1
)

search.fit(X_train, y_train)

# Step 7: Show results
print("Best parameters:", search.best_params_)
print("\nTest evaluation:\n")
print(classification_report(y_test, search.best_estimator_.predict(X_test)))


Best parameters: {'min_samples_split': 10, 'min_samples_leaf': 4, 'max_depth': None}

Test evaluation:

              precision    recall  f1-score   support

         0.0       0.63      1.00      0.77        74
         1.0       1.00      0.86      0.92       311

    accuracy                           0.89       385
   macro avg       0.81      0.93      0.85       385
weighted avg       0.93      0.89      0.89       385



In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Importing classifiers
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
try:
    from catboost import CatBoostClassifier
    catboost_available = True
except ImportError:
    catboost_available = False

# Step 1: Load the dataset
df = pd.read_excel("Final_dataset.xlsx")

# Step 2: Fill missing values with mode (most common value)
for col in df.columns:
    df[col] = df[col].fillna(df[col].mode()[0])

# Step 3: Split into features and target
X = df.drop(columns=["ID", "Disease_Risk"])
y = df["Disease_Risk"]

# Step 4: Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Define models
models = {
    "SVM": SVC(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Naive Bayes": GaussianNB(),
    "MLP": MLPClassifier(max_iter=500),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss")
}
if catboost_available:
    models["CatBoost"] = CatBoostClassifier(verbose=0)

# Step 6: Train each model and collect results
results = []

for name, model in models.items():
    # Training the model
    model.fit(X_train, y_train)

    # Predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Collect metrics
    results.append({
        "Model": name,
        "Train Accuracy": accuracy_score(y_train, y_train_pred),
        "Test Accuracy": accuracy_score(y_test, y_test_pred),
        "Test Precision": precision_score(y_test, y_test_pred, zero_division=0),
        "Test Recall": recall_score(y_test, y_test_pred, zero_division=0),
        "Test F1": f1_score(y_test, y_test_pred, zero_division=0)
    })

# Step 7: Make a table
results_df = pd.DataFrame(results)
print("Results of different classifiers:\n")
display(results_df)


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Results of different classifiers:



Unnamed: 0,Model,Train Accuracy,Test Accuracy,Test Precision,Test Recall,Test F1
0,SVM,0.787109,0.807792,0.807792,1.0,0.893678
1,Decision Tree,0.890625,0.885714,1.0,0.858521,0.923875
2,Random Forest,0.890625,0.885714,1.0,0.858521,0.923875
3,AdaBoost,0.890625,0.885714,1.0,0.858521,0.923875
4,Naive Bayes,0.890625,0.885714,1.0,0.858521,0.923875
5,MLP,0.890625,0.885714,1.0,0.858521,0.923875
6,XGBoost,0.890625,0.885714,1.0,0.858521,0.923875
7,CatBoost,0.890625,0.885714,1.0,0.858521,0.923875
