# Experiments: Classical ML

## Basic Data

In [6]:
# Import all dependencies
import pandas as pd

# Load data
data= pd.read_csv("../data/tier2_data.csv")

train_df = pd.read_csv("../data/processed_data/engineered_data/train.csv").drop(columns=["Unnamed: 0","url"], errors="ignore")
val_df   = pd.read_csv("../data/processed_data/engineered_data/val.csv").drop(columns=["Unnamed: 0","url"], errors="ignore")
testA_df = pd.read_csv("../data/processed_data/engineered_data/testA.csv").drop(columns=["Unnamed: 0","url"], errors="ignore")
#testB_df = pd.read_csv("../data/processed_data/engineered_data/more_data.csv").drop(columns=["Unnamed: 0","url"], errors="ignore")

## Models with engineered features

In [7]:
val_df["label"].value_counts()

label
0    23455
1     4691
Name: count, dtype: int64

In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression


X_train = train_df.drop(columns=["label"], errors="ignore")
X_val = val_df.drop(columns=["label"], errors="ignore")
X_testA = testA_df.drop(columns=["label"], errors="ignore")
#X_testB = testB_df.drop(columns=["label"], errors="ignore")

y_train =train_df["label"]
y_val =val_df["label"]
y_testA =testA_df["label"]
#y_testB = testB_df["label"]



scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_testA_scaled = scaler.transform(X_testA)
#X_testB_scaled = scaler.transform(X_testB)

In [9]:
X_train_scaled.shape


(294274, 19)

In [10]:
# GridSearch for LR, SVM, RandomForest with correct scaling
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

import pandas as pd

# Dictionary to store results
results = []

models = {
    "logistic_regression": {
        "model": LogisticRegression(max_iter=1000, class_weight="balanced"),
        "params": {
            "C": [0.01, 0.1, 1]
        },
        "use_scaled": True
    },

    "svm": {
        "model": SVC(probability=True, class_weight="balanced"),
        "params": {
            "C": [0.01, 0.1, 1],
            "kernel": ["linear", "rbf"]
        },
        "use_scaled": True
    },

    "random_forest": {
        "model": RandomForestClassifier(class_weight="balanced"),
        "params": {
            "n_estimators": [50, 100],
            "max_depth": [None, 10, 20]
        },
        "use_scaled": False
    }
}

for name, cfg in models.items():

    print(f"\n================ {name.upper()} ================\n")

    # Select correct train/val matrices
    X_train_used = X_train_scaled if cfg["use_scaled"] else X_train
    X_val_used   = X_val_scaled   if cfg["use_scaled"] else X_val

    clf = GridSearchCV(
        cfg["model"],
        cfg["params"],
        cv=3,
        n_jobs=-1,
        scoring="accuracy",
        verbose=1
    )

    clf.fit(X_train_used, y_train)

    best_model = clf.best_estimator_
    print("Best Params:", clf.best_params_)

    # Evaluate on validation set
    y_pred = best_model.predict(X_val_used)

    accuracy  = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall    = recall_score(y_val, y_pred)
    f1        = f1_score(y_val, y_pred)

    print("\nValidation Classification Report:\n")
    print(classification_report(y_val, y_pred))

    results.append({
        "model": name,
        "best_params": clf.best_params_,
        "accuracy": accuracy,
        "precision": precision,
        "recall": recall,
        "f1": f1
    })


# Save results
results_df = pd.DataFrame(results)
results_df.to_csv("gridsearch_engineered_results.csv", index=False)

print("\n=========== FINAL GRIDSEARCH RESULTS ===========\n")
print(results_df)




Fitting 3 folds for each of 3 candidates, totalling 9 fits


  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)


Best Params: {'C': 0.01}

Validation Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.86      0.91     23455
           1       0.54      0.79      0.64      4691

    accuracy                           0.85     28146
   macro avg       0.75      0.83      0.77     28146
weighted avg       0.88      0.85      0.86     28146



Fitting 3 folds for each of 6 candidates, totalling 18 fits


  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)
  return _ForkingPickler.loads(res)


KeyboardInterrupt: 

In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

def evaluate(model, X, y, name="Validation"):
    preds = model.predict(X)
    probas = model.predict_proba(X)[:,1] if hasattr(model, "predict_proba") else None

    print(f"=== {name} RESULTS ===")
    print("Accuracy:", accuracy_score(y, preds))
    print("Precision:", precision_score(y, preds))
    print("Recall:", recall_score(y, preds))
    print("F1:", f1_score(y, preds))
    print(classification_report(y, preds))

lr = clf.best_estimator_


evaluate(lr, X_val_scaled,  y_val,   "LR Validation")
evaluate(lr, X_testA_scaled, y_testA, "LR Test A")
#evaluate(lr, X_testB_scaled, y_testB, "LR Test B")


=== LR Validation RESULTS ===
Accuracy: 0.7554744525547445
Precision: 1.0
Recall: 0.0218
F1: 0.04266979839498924
              precision    recall  f1-score   support

           0       0.75      1.00      0.86     15002
           1       1.00      0.02      0.04      5000

    accuracy                           0.76     20002
   macro avg       0.88      0.51      0.45     20002
weighted avg       0.82      0.76      0.66     20002

=== LR Test A RESULTS ===
Accuracy: 0.7548277072779255
Precision: 1.0
Recall: 0.0224
F1: 0.04381846635367762
              precision    recall  f1-score   support

           0       0.75      1.00      0.86     14937
           1       1.00      0.02      0.04      5000

    accuracy                           0.75     19937
   macro avg       0.88      0.51      0.45     19937
weighted avg       0.82      0.75      0.65     19937





NameError: name 'X_testB_scaled' is not defined