# Experiments: Classical ML

## Basic Data

In [1]:
# Import all dependencies
import pandas as pd

# Load data
data= pd.read_csv("../data/tier2_data.csv")

train_df = pd.read_csv("../data/processed_data/engineered_data/train.csv").drop(columns=["Unnamed: 0","url"], errors="ignore")
val_df   = pd.read_csv("../data/processed_data/engineered_data/val.csv").drop(columns=["Unnamed: 0","url"], errors="ignore")
testA_df = pd.read_csv("../data/processed_data/engineered_data/testA.csv").drop(columns=["Unnamed: 0","url"], errors="ignore")
testB_df = pd.read_csv("../data/processed_data/engineered_data/testB.csv").drop(columns=["Unnamed: 0","url"], errors="ignore")

## Models with engineered features

In [2]:
val_df["label"].value_counts()

label
0    25000
1     5000
Name: count, dtype: int64

In [3]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression


X_train = train_df.drop(columns=["label"], errors="ignore")
X_val = val_df.drop(columns=["label"], errors="ignore")
X_testA = testA_df.drop(columns=["label"], errors="ignore")
X_testB = testB_df.drop(columns=["label"], errors="ignore")

y_train =train_df["label"]
y_val =val_df["label"]
y_testA =testA_df["label"]
y_testB = testB_df["label"]



scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_testA_scaled = scaler.transform(X_testA)
X_testB_scaled = scaler.transform(X_testB)

In [4]:
X_train_scaled.shape


(93838, 17)

In [5]:
# GridSearch for LR, SVM, RandomForest with correct scaling
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

import pandas as pd

# Dictionary to store results
results = []

models = {
    "logistic_regression": {
        "model": LogisticRegression(max_iter=1000, class_weight="balanced"),
        "params": {
            "C": [0.01, 0.1, 1]
        },
        "use_scaled": True
    },

    "svm": {
        "model": SVC(probability=True, class_weight="balanced"),
        "params": {
            "C": [0.01, 0.1, 1],
            "kernel": ["linear", "rbf"]
        },
        "use_scaled": True
    },

    "random_forest": {
        "model": RandomForestClassifier(class_weight="balanced"),
        "params": {
            "n_estimators": [50, 100],
            "max_depth": [None, 10, 20]
        },
        "use_scaled": False
    }
}

for name, cfg in models.items():

    print(f"\n================ {name.upper()} ================\n")

    # Select correct train/val matrices
    X_train_used = X_train_scaled if cfg["use_scaled"] else X_train
    X_val_used   = X_val_scaled   if cfg["use_scaled"] else X_val

    clf = GridSearchCV(
        cfg["model"],
        cfg["params"],
        cv=3,
        n_jobs=-1,
        scoring="accuracy",
        verbose=1
    )

    clf.fit(X_train_used, y_train)

    best_model = clf.best_estimator_
    print("Best Params:", clf.best_params_)

    # Evaluate on validation set
    y_pred = best_model.predict(X_val_used)

    accuracy  = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall    = recall_score(y_val, y_pred)
    f1        = f1_score(y_val, y_pred)

    print("\nValidation Classification Report:\n")
    print(classification_report(y_val, y_pred))

    results.append({
        "model": name,
        "best_params": clf.best_params_,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    })


# Save results
results_df = pd.DataFrame(results)
results_df.to_csv("gridsearch_engineered_results.csv", index=False)

print("\n=========== FINAL GRIDSEARCH RESULTS ===========\n")
print(results_df)




Fitting 3 folds for each of 3 candidates, totalling 9 fits


  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)


Best Params: {'C': 1}

Validation Classification Report:

              precision    recall  f1-score   support

           0       0.97      0.90      0.93     25000
           1       0.63      0.84      0.72      5000

    accuracy                           0.89     30000
   macro avg       0.80      0.87      0.83     30000
weighted avg       0.91      0.89      0.90     30000



Fitting 3 folds for each of 6 candidates, totalling 18 fits


  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)


Best Params: {'C': 1, 'kernel': 'rbf'}

Validation Classification Report:

              precision    recall  f1-score   support

           0       0.98      0.96      0.97     25000
           1       0.84      0.92      0.88      5000

    accuracy                           0.96     30000
   macro avg       0.91      0.94      0.93     30000
weighted avg       0.96      0.96      0.96     30000



Fitting 3 folds for each of 6 candidates, totalling 18 fits


  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)


Best Params: {'max_depth': 20, 'n_estimators': 50}

Validation Classification Report:

              precision    recall  f1-score   support

           0       0.99      0.98      0.99     25000
           1       0.91      0.97      0.94      5000

    accuracy                           0.98     30000
   macro avg       0.95      0.97      0.96     30000
weighted avg       0.98      0.98      0.98     30000



                 model                            best_params  accuracy  \
0  logistic_regression                               {'C': 1}  0.892400   
1                  svm              {'C': 1, 'kernel': 'rbf'}  0.957500   
2        random_forest  {'max_depth': 20, 'n_estimators': 50}  0.978367   

   precision  recall        f1  
0   0.633675  0.8400  0.722394  
1   0.838821  0.9222  0.878537  
2   0.908391  0.9678  0.937155  


In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

def evaluate(model, X, y, name="Validation"):
    preds = model.predict(X)
    probas = model.predict_proba(X)[:,1] if hasattr(model, "predict_proba") else None

    print(f"=== {name} RESULTS ===")
    print("Accuracy:", accuracy_score(y, preds))
    print("Precision:", precision_score(y, preds))
    print("Recall:", recall_score(y, preds))
    print("F1:", f1_score(y, preds))
    print(classification_report(y, preds))

lr = clf.best_estimator_


evaluate(lr, X_val_scaled,  y_val,   "LR Validation")
evaluate(lr, X_testA_scaled, y_testA, "LR Test A")
evaluate(lr, X_testB_scaled, y_testB, "LR Test B")


=== LR Validation RESULTS ===
Accuracy: 0.8345333333333333
Precision: 1.0
Recall: 0.0072
F1: 0.014297061159650517
              precision    recall  f1-score   support

           0       0.83      1.00      0.91     25000
           1       1.00      0.01      0.01      5000

    accuracy                           0.83     30000
   macro avg       0.92      0.50      0.46     30000
weighted avg       0.86      0.83      0.76     30000

=== LR Test A RESULTS ===
Accuracy: 0.9097454545454545
Precision: 1.0
Recall: 0.0072
F1: 0.014297061159650517
              precision    recall  f1-score   support

           0       0.91      1.00      0.95     50000
           1       1.00      0.01      0.01      5000

    accuracy                           0.91     55000
   macro avg       0.95      0.50      0.48     55000
weighted avg       0.92      0.91      0.87     55000





=== LR Test B RESULTS ===
Accuracy: 0.9527142857142857
Precision: 1.0
Recall: 0.007
F1: 0.013902681231380337
              precision    recall  f1-score   support

           0       0.95      1.00      0.98    100000
           1       1.00      0.01      0.01      5000

    accuracy                           0.95    105000
   macro avg       0.98      0.50      0.49    105000
weighted avg       0.95      0.95      0.93    105000



