In [25]:
import pandas as pd
import joblib
from sklearn.metrics import accuracy_score, precision_score, recall_score
import os

In [26]:
# load data + models

test_file = "../DATA/Final/encoded_parking_tickets.csv"
df_test = pd.read_csv(test_file)

# Keep only 2024 for testing
df_test = df_test[df_test['Year'] == 2024]

if df_test.empty:
    raise ValueError("No 2024 data found for testing!")

# Features for model (all numeric except 'Year')
features = df_test.columns.tolist()
features.remove('Year')

X_test = df_test[features]

model_dir = "../OUTPUT/Final/models"
model_files = [f for f in os.listdir(model_dir) if f.startswith("isolation_forest_") and f.endswith(".joblib")]


In [None]:
# score models

results = []

for model_file in model_files:
    label = model_file.replace("isolation_forest_", "").replace(".joblib", "")
    
    # Load model and corresponding scaler
    model = joblib.load(os.path.join(model_dir, model_file))
    scaler = joblib.load(os.path.join(model_dir, f"scaler_{label}.joblib"))
    
    # Scale features
    X_scaled = scaler.transform(X_test)
    
    # Predict anomalies
    preds = model.predict(X_scaled)  # 1 = normal, -1 = anomaly
    # Map to ticket prediction: -1 → 0 (no ticket), 1 → 1 (ticket)
    y_pred = (preds == 1).astype(int)
    
    # Since test set is all positive tickets, true labels = 1
    y_true = pd.Series([1]*len(df_test))
    
    # Compute metrics
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)
    
    # note --> no need for recall or precision because model is only positive occurences
    results.append({
        'Model': label,
        'Accuracy': acc,
    })

In [33]:
# score models

results_df = pd.DataFrame(results).sort_values('Accuracy', ascending=False)
print("\n--- Model performance on 2024 test set ---")
print(results_df)


--- Model performance on 2024 test set ---
       Model  Accuracy  Precision    Recall
1  2010-2014  0.967062        1.0  0.967062
3  2015-2019  0.963616        1.0  0.963616
0   all_time  0.961082        1.0  0.961082
2     recent  0.949225        1.0  0.949225
4  2020-2024  0.947299        1.0  0.947299


In [None]:
# note best was from 2010-2014, even though you'd expect most recent to be best