Training: XGBoost

In [4]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = XGBClassifier(eval_metric="logloss",n_estimators=250)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ XGBoost Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "xgboost_model.pkl")


✅ XGBoost Accuracy: 0.5294
🕒 Training Time: 0.18 seconds
              precision    recall  f1-score   support

           0       0.67      0.73      0.70        11
           1       0.92      0.92      0.92        12
           2       0.44      0.44      0.44        16
           3       0.29      0.45      0.36        11
           4       0.45      0.28      0.34        18

    accuracy                           0.53        68
   macro avg       0.55      0.56      0.55        68
weighted avg       0.54      0.53      0.53        68



['xgboost_model.pkl']

Training: Random Forest

In [None]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = RandomForestClassifier(n_estimators=250)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ RandomForest Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "randomforest_model.pkl")


✅ RandomForest Accuracy: 0.5294
🕒 Training Time: 0.29 seconds
              precision    recall  f1-score   support

           0       0.67      0.91      0.77        11
           1       0.85      0.92      0.88        12
           2       0.42      0.31      0.36        16
           3       0.38      0.55      0.44        11
           4       0.33      0.22      0.27        18

    accuracy                           0.53        68
   macro avg       0.53      0.58      0.54        68
weighted avg       0.50      0.53      0.51        68



['randomforest_model.pkl']

Training: Extra Trees

In [7]:
import pandas as pd
import time
import joblib
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = lgb.LGBMClassifier(n_estimators=300)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ LightGBM Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "lightgbm_model.pkl")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 90
[LightGBM] [Info] Number of data points in the train set: 270, number of used features: 3
[LightGBM] [Info] Start training from score -1.504077
[LightGBM] [Info] Start training from score -1.537979
[LightGBM] [Info] Start training from score -1.686399
[LightGBM] [Info] Start training from score -1.591089
[LightGBM] [Info] Start training from score -1.748274
✅ LightGBM Accuracy: 0.6176
🕒 Training Time: 0.18 seconds
              precision    recall  f1-score   support

           0       0.75      0.82      0.78        11
           1       0.85      0.92      0.88        12
           2       0.59      0.62      0.61        16
           3       0.38      0.45      0.42        11
           4       0.54      0.39      0.45        18

    accuracy                           0.62        68
   macro

['lightgbm_model.pkl']

Training: Extra Trees

In [8]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = ExtraTreesClassifier(n_estimators=250, random_state=42)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ ExtraTrees Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "extratrees_model.pkl")


✅ ExtraTrees Accuracy: 0.4706
🕒 Training Time: 0.24 seconds
              precision    recall  f1-score   support

           0       0.62      0.91      0.74        11
           1       0.85      0.92      0.88        12
           2       0.25      0.19      0.21        16
           3       0.36      0.45      0.40        11
           4       0.23      0.17      0.19        18

    accuracy                           0.47        68
   macro avg       0.46      0.53      0.49        68
weighted avg       0.43      0.47      0.44        68



['extratrees_model.pkl']

Training: SVM

In [32]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = SVC(kernel="rbf", probability=True)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ SVM Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "svm_model.pkl")


✅ SVM Accuracy: 0.3676
🕒 Training Time: 0.02 seconds
              precision    recall  f1-score   support

           0       0.24      0.82      0.37        11
           1       1.00      0.92      0.96        12
           2       0.20      0.12      0.15        16
           3       0.50      0.09      0.15        11
           4       0.29      0.11      0.16        18

    accuracy                           0.37        68
   macro avg       0.44      0.41      0.36        68
weighted avg       0.42      0.37      0.33        68



['svm_model.pkl']

LightGBM with Optuna

In [None]:
import optuna
import lightgbm as lgb
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd

df = pd.read_csv("transformed_land_mines.csv")
# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define objective function
def objective(trial):
    params = {
        'objective': 'multiclass',
        'num_class': len(set(y)),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.03, 0.3),
        'num_leaves': trial.suggest_int('num_leaves', 50, 200),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.5, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.5, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
    }
    
    model = lgb.LGBMClassifier(**params)
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    return cross_val_score(model, X_train, y_train, cv=kf, scoring='accuracy').mean()

# Run Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Train final model
best_params = study.best_params
model = lgb.LGBMClassifier(**best_params)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(f"Final Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("Best Parameters:", best_params)


XGBoost with Optuna

In [1]:
import optuna
import xgboost as xgb
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define objective function
def objective(trial):
    params = {
        'objective': 'multi:softmax',
        'num_class': len(set(y)),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
        'lambda': trial.suggest_loguniform('lambda', 1e-8, 10.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-8, 10.0),
    }
    
    model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', **params)
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    return cross_val_score(model, X_train, y_train, cv=kf, scoring='accuracy').mean()

# Run Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Train final model
best_params = study.best_params
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', **best_params)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(f"Final Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("Best Parameters:", best_params)


[I 2025-03-23 14:54:09,492] A new study created in memory with name: no-name-11236871-8e43-4890-9aaa-21e3744e7481
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
  'subsample': trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'lambda': trial.suggest_loguniform('lambda', 1e-8, 10.0),
  'alpha': trial.suggest_loguniform('alpha', 1e-8, 10.0),
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
[I 2025-03-23 14:54:09,674] Trial 0 finished with value: 0.9166666666666666 and para

Final Accuracy: 1.0000
Best Parameters: {'learning_rate': 0.023874766876701002, 'max_depth': 6, 'min_child_weight': 5, 'subsample': 0.8276324160849453, 'colsample_bytree': 0.766476796013486, 'lambda': 1.4160332371260775e-05, 'alpha': 0.011953030118598672}


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


RandomTreeClassifer with Optuna

In [2]:
import optuna
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load dataset
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define objective function
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 20),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 5),
        'max_features': trial.suggest_uniform('max_features', 0.5, 1.0),
    }
    
    model = ExtraTreesClassifier(**params)
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    return cross_val_score(model, X_train, y_train, cv=kf, scoring='accuracy').mean()

# Run Optuna
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Train final model
best_params = study.best_params
model = ExtraTreesClassifier(**best_params)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print(f"Final Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("Best Parameters:", best_params)


[I 2025-03-23 15:04:19,261] A new study created in memory with name: no-name-8a42addc-f5af-41a2-880b-5720b9603487
  'max_features': trial.suggest_uniform('max_features', 0.5, 1.0),
[I 2025-03-23 15:04:20,464] Trial 0 finished with value: 0.9416666666666668 and parameters: {'n_estimators': 294, 'max_depth': 6, 'min_samples_split': 6, 'min_samples_leaf': 5, 'max_features': 0.8040915273918618}. Best is trial 0 with value: 0.9416666666666668.
  'max_features': trial.suggest_uniform('max_features', 0.5, 1.0),
[I 2025-03-23 15:04:21,914] Trial 1 finished with value: 0.9416666666666668 and parameters: {'n_estimators': 322, 'max_depth': 20, 'min_samples_split': 7, 'min_samples_leaf': 2, 'max_features': 0.7420802434791295}. Best is trial 0 with value: 0.9416666666666668.
  'max_features': trial.suggest_uniform('max_features', 0.5, 1.0),
[I 2025-03-23 15:04:23,142] Trial 2 finished with value: 0.9416666666666668 and parameters: {'n_estimators': 241, 'max_depth': 19, 'min_samples_split': 7, 'min_

Final Accuracy: 1.0000
Best Parameters: {'n_estimators': 155, 'max_depth': 16, 'min_samples_split': 3, 'min_samples_leaf': 5, 'max_features': 0.7081082985415836}
