In [113]:
import os
import time

import polars as pl
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer, StandardScaler

In [114]:
SEED = 462

In [115]:
def load_data(filename):
    path = os.path.join("data", "tabular", filename)
    data = pl.read_csv(path).to_numpy()
    X = data[:, :-1].astype(float)
    y = data[:, -1]
    return X, y

In [116]:
X_train, y_train = load_data("train_processed.csv")
X_val, y_val = load_data("validation_processed.csv")
X_test, y_test = load_data("test_processed.csv")

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [117]:
print("Starting Training...")
start_time = time.time()

model = OneVsRestClassifier(LogisticRegression(solver="lbfgs", max_iter=10000, random_state=SEED))
model.fit(X_train, y_train)

end_time = time.time()
print(f"Total Training Time: {end_time - start_time:.4f} seconds\n")

Starting Training...
Total Training Time: 8.2222 seconds



In [118]:
def print_metrics(X, y, name):
    pred = model.predict(X)
    probs = model.predict_proba(X)

    acc = accuracy_score(y, pred)
    prec = precision_score(y, pred, average="weighted")
    rec = recall_score(y, pred, average="weighted")
    f1 = f1_score(y, pred, average="weighted")

    lb = LabelBinarizer()
    lb.fit(y_train)
    y_bin = lb.transform(y)
    auc_scores = roc_auc_score(y_bin, probs, multi_class='ovr', average=None)

    print(f"--- {name} Metrics ---")
    print(f"Accuracy:  {acc * 100:.2f}%")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1 Score:  {f1:.4f}")
    for i, cls in enumerate(model.classes_):
        print(f"AUC for {cls}: {auc_scores[i]:.4f}")

In [119]:
print_metrics(X_train, y_train, "Training")
print_metrics(X_val, y_val, "Validation")
print_metrics(X_test, y_test, "Test")

--- Training Metrics ---
Accuracy:  99.42%
Precision: 0.9942
Recall:    0.9942
F1 Score:  0.9942
AUC for banana: 1.0000
AUC for carrot: 0.9998
AUC for cucumber: 0.9999
AUC for mandarin: 0.9999
AUC for tomato: 1.0000
--- Validation Metrics ---
Accuracy:  98.04%
Precision: 0.9804
Recall:    0.9804
F1 Score:  0.9804
AUC for banana: 0.9998
AUC for carrot: 0.9990
AUC for cucumber: 0.9991
AUC for mandarin: 1.0000
AUC for tomato: 1.0000
--- Test Metrics ---
Accuracy:  89.70%
Precision: 0.9186
Recall:    0.8970
F1 Score:  0.8951
AUC for banana: 0.9971
AUC for carrot: 0.9935
AUC for cucumber: 0.9885
AUC for mandarin: 0.9989
AUC for tomato: 0.9980
