### Model Building and Evaluation

Training: XGBoost

In [1]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = XGBClassifier(
    eval_metric="logloss",  
    booster="dart",  # Dropout-based boosting  
    n_estimators=1000,  
    max_depth=10,  
    learning_rate=0.7,  
    subsample=0.8,  
    colsample_bytree=0.8,  
    reg_lambda=18,
    reg_alpha=0.6,  # L2 regularization term (adjust this value as needed)
)



# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ XGBoost Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "xgboost_model.pkl")


✅ XGBoost Accuracy: 0.5294
🕒 Training Time: 52.80 seconds
              precision    recall  f1-score   support

           0       0.58      0.64      0.61        11
           1       0.92      0.92      0.92        12
           2       0.54      0.44      0.48        16
           3       0.30      0.55      0.39        11
           4       0.45      0.28      0.34        18

    accuracy                           0.53        68
   macro avg       0.56      0.56      0.55        68
weighted avg       0.55      0.53      0.53        68



['xgboost_model.pkl']

In [9]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M", "V"])
y = df["M"] - 1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize AdaBoost model with a base estimator
base_estimator = DecisionTreeClassifier(max_depth=7)  # Base weak learner
model = AdaBoostClassifier(
    estimator=base_estimator,
    n_estimators=500,
    learning_rate=0.8
)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ AdaBoost Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "adaboost_model.pkl")


✅ AdaBoost Accuracy: 0.5735
🕒 Training Time: 0.95 seconds
              precision    recall  f1-score   support

           0       0.67      0.91      0.77        11
           1       0.92      0.92      0.92        12
           2       0.47      0.44      0.45        16
           3       0.35      0.55      0.43        11
           4       0.56      0.28      0.37        18

    accuracy                           0.57        68
   macro avg       0.59      0.62      0.59        68
weighted avg       0.58      0.57      0.56        68



['adaboost_model.pkl']

CatBoost

In [1]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from catboost import CatBoostClassifier

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M", "V"])
y = df["M"] - 1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = CatBoostClassifier(iterations=250, eval_metric="Accuracy", verbose=50)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ CatBoost Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "catboost_model.pkl")


Learning rate set to 0.238816
0:	learn: 0.5592593	total: 53.4ms	remaining: 13.3s
50:	learn: 0.8592593	total: 111ms	remaining: 432ms
100:	learn: 0.9703704	total: 157ms	remaining: 231ms
150:	learn: 0.9925926	total: 207ms	remaining: 135ms
200:	learn: 1.0000000	total: 254ms	remaining: 62ms
249:	learn: 1.0000000	total: 298ms	remaining: 0us
✅ CatBoost Accuracy: 0.5882
🕒 Training Time: 0.38 seconds
              precision    recall  f1-score   support

           0       0.71      0.91      0.80        11
           1       0.85      0.92      0.88        12
           2       0.53      0.50      0.52        16
           3       0.40      0.55      0.46        11
           4       0.45      0.28      0.34        18

    accuracy                           0.59        68
   macro avg       0.59      0.63      0.60        68
weighted avg       0.58      0.59      0.57        68



['catboost_model.pkl']

Logistic Regression

In [2]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M", "V_log"])
y = df["M"] - 1  # Adjust target variable if necessary

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize Logistic Regression model
model = LogisticRegression(max_iter=1000, solver="lbfgs", multi_class="auto")

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ Logistic Regression Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "logistic_regression_model.pkl")
print("🎉 Model saved as 'logistic_regression_model.pkl'")


✅ Logistic Regression Accuracy: 0.4412
🕒 Training Time: 0.01 seconds
              precision    recall  f1-score   support

           0       0.52      0.87      0.65        15
           1       0.88      1.00      0.93        14
           2       0.25      0.15      0.19        13
           3       0.00      0.00      0.00        13
           4       0.08      0.08      0.08        13

    accuracy                           0.44        68
   macro avg       0.34      0.42      0.37        68
weighted avg       0.36      0.44      0.39        68

🎉 Model saved as 'logistic_regression_model.pkl'




Training: Random Forest

In [None]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = RandomForestClassifier(n_estimators=250)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ RandomForest Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "randomforest_model.pkl")


✅ RandomForest Accuracy: 0.5294
🕒 Training Time: 0.29 seconds
              precision    recall  f1-score   support

           0       0.67      0.91      0.77        11
           1       0.85      0.92      0.88        12
           2       0.42      0.31      0.36        16
           3       0.38      0.55      0.44        11
           4       0.33      0.22      0.27        18

    accuracy                           0.53        68
   macro avg       0.53      0.58      0.54        68
weighted avg       0.50      0.53      0.51        68



['randomforest_model.pkl']

Training: Extra Trees

In [8]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = ExtraTreesClassifier(n_estimators=250, random_state=42)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ ExtraTrees Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "extratrees_model.pkl")


✅ ExtraTrees Accuracy: 0.4706
🕒 Training Time: 0.24 seconds
              precision    recall  f1-score   support

           0       0.62      0.91      0.74        11
           1       0.85      0.92      0.88        12
           2       0.25      0.19      0.21        16
           3       0.36      0.45      0.40        11
           4       0.23      0.17      0.19        18

    accuracy                           0.47        68
   macro avg       0.46      0.53      0.49        68
weighted avg       0.43      0.47      0.44        68



['extratrees_model.pkl']

Training: SVM

In [32]:
import pandas as pd
import time
import joblib
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("transformed_land_mines.csv")

# Prepare data
X = df.drop(columns=["M","V"])
y = df["M"]-1

# Split into train-test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize model
model = SVC(kernel="rbf", probability=True)

# Train
start_time = time.time()
model.fit(X_train, y_train)
end_time = time.time()

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ SVM Accuracy: {accuracy:.4f}")
print(f"🕒 Training Time: {end_time - start_time:.2f} seconds")
print(classification_report(y_test, y_pred))

# Save model
joblib.dump(model, "svm_model.pkl")


✅ SVM Accuracy: 0.3676
🕒 Training Time: 0.02 seconds
              precision    recall  f1-score   support

           0       0.24      0.82      0.37        11
           1       1.00      0.92      0.96        12
           2       0.20      0.12      0.15        16
           3       0.50      0.09      0.15        11
           4       0.29      0.11      0.16        18

    accuracy                           0.37        68
   macro avg       0.44      0.41      0.36        68
weighted avg       0.42      0.37      0.33        68



['svm_model.pkl']