Training: XGBoost

In [2]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = XGBClassifier(use_label_encoder=False, eval_metric="logloss")

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f" XGBoost Accuracy: {accuracy:.4f}")
print(f" Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "xgboost_model.pkl")


 XGBoost Accuracy: 0.5441
 Training Time: 0.12 seconds
              precision    recall  f1-score   support

           0       0.67      0.73      0.70        11
           1       0.85      0.92      0.88        12
           2       0.47      0.44      0.45        16
           3       0.33      0.45      0.38        11
           4       0.46      0.33      0.39        18

    accuracy                           0.54        68
   macro avg       0.55      0.57      0.56        68
weighted avg       0.54      0.54      0.54        68



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


['xgboost_model.pkl']

Training: Random Forest

In [3]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ RandomForest Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "randomforest_model.pkl")


✅ RandomForest Accuracy: 0.5000
🕒 Training Time: 0.15 seconds
              precision    recall  f1-score   support

           0       0.62      0.73      0.67        11
           1       0.85      0.92      0.88        12
           2       0.44      0.50      0.47        16
           3       0.31      0.36      0.33        11
           4       0.27      0.17      0.21        18

    accuracy                           0.50        68
   macro avg       0.50      0.53      0.51        68
weighted avg       0.48      0.50      0.48        68



['randomforest_model.pkl']

4️⃣ Training: Extra Trees

In [4]:
import pandas as pd
import time
import joblib
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = lgb.LGBMClassifier()

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ LightGBM Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "lightgbm_model.pkl")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 162
[LightGBM] [Info] Number of data points in the train set: 270, number of used features: 4
[LightGBM] [Info] Start training from score -1.504077
[LightGBM] [Info] Start training from score -1.537979
[LightGBM] [Info] Start training from score -1.686399
[LightGBM] [Info] Start training from score -1.591089
[LightGBM] [Info] Start training from score -1.748274
✅ LightGBM Accuracy: 0.6029
🕒 Training Time: 0.08 seconds
              precision    recall  f1-score   support

           0       0.75      0.82      0.78        11
           1       0.85      0.92      0.88        12
           2       0.60      0.56      0.58        16
           3       0.36      0.45      0.40        11
           4       0.50      0.39      0.44        18

    accuracy                           0.60        68
   macr

['lightgbm_model.pkl']

4️⃣ Training: Extra Trees

In [5]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = ExtraTreesClassifier(n_estimators=100, random_state=42)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ ExtraTrees Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "extratrees_model.pkl")


✅ ExtraTrees Accuracy: 0.5294
🕒 Training Time: 0.10 seconds
              precision    recall  f1-score   support

           0       0.64      0.82      0.72        11
           1       0.73      0.92      0.81        12
           2       0.50      0.56      0.53        16
           3       0.38      0.45      0.42        11
           4       0.25      0.11      0.15        18

    accuracy                           0.53        68
   macro avg       0.50      0.57      0.53        68
weighted avg       0.48      0.53      0.49        68



['extratrees_model.pkl']

5️⃣ Training: SVM

In [6]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = SVC(kernel="rbf", probability=True)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ SVM Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "svm_model.pkl")


✅ SVM Accuracy: 0.4559
🕒 Training Time: 0.02 seconds
              precision    recall  f1-score   support

           0       0.36      0.82      0.50        11
           1       1.00      0.92      0.96        12
           2       0.33      0.25      0.29        16
           3       0.27      0.27      0.27        11
           4       0.44      0.22      0.30        18

    accuracy                           0.46        68
   macro avg       0.48      0.50      0.46        68
weighted avg       0.47      0.46      0.44        68



['svm_model.pkl']

1️⃣ LightGBM with Optuna

In [6]:
import optuna
import lightgbm as lgb
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset from CSV
df = pd.read_csv("transformed_land_mines.csv")  # Replace with your actual file path

# Assuming the last column is the target variable
X = df.iloc[:, :-1]  # Features (all columns except the last)
y = df.iloc[:, -1]   # Target (last column)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

def objective(trial):
    params = {
        'num_leaves': trial.suggest_int('num_leaves', 20, 50),
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'min_child_samples': trial.suggest_int('min_child_samples', 20, 50),
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0, log=True),
    }

    model = lgb.LGBMClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    return accuracy_score(y_test, preds)

# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)

print(f"Best Trial: {study.best_trial.params}")


[I 2025-03-22 19:06:43,328] A new study created in memory with name: no-name-643ae442-7dd9-4e5a-84a4-bd90176425ab
[W 2025-03-22 19:06:43,331] Trial 0 failed with parameters: {'num_leaves': 29, 'max_depth': 7, 'learning_rate': 0.03333732963739545, 'min_child_samples': 21, 'lambda_l1': 0.03761272875698343, 'lambda_l2': 3.0044250320199217} because of the following error: ValueError('Unknown label type: continuous. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.').
Traceback (most recent call last):
  File "/workspaces/FDS_MiniProject/FDS/lib/python3.12/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "/tmp/ipykernel_45148/2183431811.py", line 31, in objective
    model.fit(X_train, y_train)
  File "/workspaces/FDS_MiniProject/FDS/lib/python3.12/site-packages/lightgbm/sklearn.py", line 1513, in fit
    _LGBMCheckClassificationTa

ValueError: Unknown label type: continuous. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.

In [10]:
import optuna
import xgboost as xgb
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset (update file path if needed)
df = pd.read_csv("transformed_land_mines.csv")

# Define feature columns and target column
feature_columns = ["V", "H", "S"]  # Features
target_column = "M"  # Update if your target column has a different name

# Extract features and target
X = df[feature_columns].values
y = df[target_column].values

# Ensure class labels start from 0
y = y - y.min()

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Define Optuna objective function
def objective(trial):
    params = {
        "max_depth": trial.suggest_int("max_depth", 3, 8),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
        "min_child_weight": trial.suggest_int("min_child_weight", 3, 10),
        "lambda": trial.suggest_float("lambda", 1e-8, 10.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 10.0, log=True),
        "objective": "multi:softmax",
        "num_class": len(np.unique(y)),  # Ensure num_class is correct
        "eval_metric": "mlogloss",
        "n_jobs": -1,  # Use all available CPU cores
    }

    # Cross-validation
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    accuracy_scores = []

    for train_idx, val_idx in kf.split(X_train, y_train):
        X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
        y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

        model = xgb.XGBClassifier(**params, verbosity=0)
        model.fit(X_train_fold, y_train_fold)
        y_pred = model.predict(X_val_fold)
        accuracy_scores.append(accuracy_score(y_val_fold, y_pred))

    return np.mean(accuracy_scores)

# Run Optuna optimization
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)

# Train final model with best parameters
best_params = study.best_params
model = xgb.XGBClassifier(**best_params, verbosity=0)
model.fit(X_train, y_train)

# Evaluate on test set
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ XGBoost Accuracy: {accuracy:.4f}")

# Save the best model (optional)
model.save_model("best_xgboost_model.json")


[I 2025-03-22 19:11:31,992] A new study created in memory with name: no-name-396c2ad9-b31c-43f9-af3b-c4be1608408e
[I 2025-03-22 19:11:34,971] Trial 0 finished with value: 0.5296296296296296 and parameters: {'max_depth': 4, 'learning_rate': 0.0871664246882117, 'min_child_weight': 10, 'lambda': 0.05988638830569042, 'alpha': 0.006688256538912946}. Best is trial 0 with value: 0.5296296296296296.
[I 2025-03-22 19:11:35,271] Trial 1 finished with value: 0.5037037037037038 and parameters: {'max_depth': 4, 'learning_rate': 0.05523525597057152, 'min_child_weight': 10, 'lambda': 0.011764944567053003, 'alpha': 7.205186918134593e-08}. Best is trial 0 with value: 0.5296296296296296.
[I 2025-03-22 19:11:35,522] Trial 2 finished with value: 0.47777777777777775 and parameters: {'max_depth': 3, 'learning_rate': 0.019766596112075895, 'min_child_weight': 4, 'lambda': 1.4903987954967152e-05, 'alpha': 4.226036072910565e-07}. Best is trial 0 with value: 0.5296296296296296.
[I 2025-03-22 19:11:35,873] Trial 

✅ XGBoost Accuracy: 0.5588
