In [1]:
nb_folds = 5
seed = 2024

input_dir = "input/"
labels_dir = input_dir + "station_data"
satelite_dir = input_dir + "satelite_data"

!echo $labels_dir
!ls -alh $labels_dir | head

!echo $satelite_dir
!ls -alh $satelite_dir | tail

input/station_data
total 4.0M
drwxrwxr-x 2 moto moto 4.0K Jan 26 20:41 .
drwxrwxr-x 4 moto moto 4.0K Feb 12 11:15 ..
-rw-rw-r-- 1 moto moto 349K Aug 25 01:19 Atlantic_City_1993_2013_training_data.csv
-rw-rw-r-- 1 moto moto 329K Aug 25 01:19 Baltimore_1993_2013_training_data.csv
-rw-rw-r-- 1 moto moto 315K Aug 25 01:19 Eastport_1993_2013_training_data.csv
-rw-rw-r-- 1 moto moto 321K Aug 25 01:19 Fort_Pulaski_1993_2013_training_data.csv
-rw-rw-r-- 1 moto moto 291K Aug 25 01:19 Lewes_1993_2013_training_data.csv
-rw-rw-r-- 1 moto moto 335K Aug 25 01:19 New_London_1993_2013_training_data.csv
-rw-rw-r-- 1 moto moto 313K Aug 25 01:19 Newport_1993_2013_training_data.csv
input/satelite_data
-rw-rw-r-- 1 moto moto 241K Aug 25 21:41 dt_ena_20131222_vDT2021.nc
-rw-rw-r-- 1 moto moto 241K Aug 25 21:41 dt_ena_20131223_vDT2021.nc
-rw-rw-r-- 1 moto moto 241K Aug 25 21:41 dt_ena_20131224_vDT2021.nc
-rw-rw-r-- 1 moto moto 241K Aug 25 21:42 dt_ena_20131225_vDT2021.nc
-rw-rw-r-- 1 moto moto 241K Aug 25 21

In [2]:
# !pip install netCDF4
import netCDF4
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob

In [3]:
import numpy as np
import pandas as pd
import pickle
import glob

from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import numpy as np
np.bool = bool

from sklearn import metrics
import matplotlib.pyplot as plt
from tqdm import tqdm


# Mine
from Isolation import IsolationModel, GcForestModel

import utils
utils.seed_basic(seed)

!python --version

Python 3.9.21


In [4]:
from sklearn.metrics import recall_score, precision_score, f1_score, roc_auc_score, accuracy_score
from sklearn.metrics import precision_recall_curve, auc

In [5]:
def get_best_threshold(y_pred, y_val, step_range=0.001, verbose=False):
    # Get the probability values for the positive class
    y_prob = y_pred[:, 1] if (len(y_pred.shape) >= 2) else y_pred
    y_test = y_val[:]
    
    # Initialize the variables to store the best threshold and F1 score
    best_threshold = 0
    best_f1 = 0
     
    # Loop through the possible threshold values from 0 to 1 with a step of 0.01
    for threshold in np.arange(0, 1, step_range):
        # Apply the threshold to the probability values to get the predicted labels
        y_pred_bin = (y_prob >= threshold).astype(int)
        if (y_pred_bin.mean() > 0) and (y_pred_bin.mean() < 1):
            # Calculate the F1 score for the current threshold
            f1 = f1_score(y_test, y_pred_bin)
            # Update the best threshold and F1 score if the current F1 score is higher
            if f1 > best_f1:
                best_threshold = threshold
                best_f1 = f1

    auc = -1
    if (y_val.mean() > 0) and (y_val.mean() < 1):
        auc = roc_auc_score(y_val, y_prob)
    
    if verbose:
        # Print the best threshold and F1 score
        print(f"The optimal threshold is {best_threshold:.2f}")
        print(f"The F1 score using the optimal threshold is {best_f1:.3f}")

    return float(best_threshold), float(round(best_f1, 3)), float(round(auc, 3))

In [6]:
filenames = glob.glob(f"{labels_dir}/*.csv")
df_full = []
for filename in filenames:
    if "merged" in filename:
        continue
    print(filename)
    df_run = pd.read_csv(filename)
    df_full.append(df_run)

print(len(df_full))
df_full = pd.concat(df_full).reset_index(drop=True)

first_date = '1993-01-01'
first_year = int(first_date[:4])

print(df_full.groupby(["location"])["anomaly"].agg(["count", "mean", "sum"]))
df_full["fold"] = df_full["t"].apply(lambda t: (int(t[:4]) - first_year) % nb_folds)
print(df_full.groupby(["fold"])["anomaly"].agg(["count", "mean"]))
df_full.tail()

input/station_data/Fort_Pulaski_1993_2013_training_data.csv
input/station_data/Eastport_1993_2013_training_data.csv
input/station_data/Sewells_Point_1993_2013_training_data.csv
input/station_data/Lewes_1993_2013_training_data.csv
input/station_data/The_Battery_1993_2013_training_data.csv
input/station_data/New_London_1993_2013_training_data.csv
input/station_data/Portland_1993_2013_training_data.csv
input/station_data/Baltimore_1993_2013_training_data.csv
input/station_data/Newport_1993_2013_training_data.csv
input/station_data/Washington_1993_2013_training_data.csv
input/station_data/Atlantic_City_1993_2013_training_data.csv
input/station_data/Sandy_Hook_1993_2013_training_data.csv
12
               count      mean  sum
location                           
Atlantic City   7429  0.043478  323
Baltimore       7649  0.023271  178
Eastport        7495  0.062041  465
Fort Pulaski    7628  0.014552  111
Lewes           7639  0.041367  316
New London      7607  0.004470   34
Newport         7

Unnamed: 0,t,anomaly,location,latitude,longitude,fold
90443,2013-12-27,0,Sandy Hook,40.466944,-74.009444,0
90444,2013-12-28,0,Sandy Hook,40.466944,-74.009444,0
90445,2013-12-29,0,Sandy Hook,40.466944,-74.009444,0
90446,2013-12-30,0,Sandy Hook,40.466944,-74.009444,0
90447,2013-12-31,0,Sandy Hook,40.466944,-74.009444,0


In [7]:
df_full.groupby("t")["location"].count().sort_values()

t
2002-02-28     9
2004-01-25     9
2004-01-24     9
2004-01-16     9
2004-02-01     9
              ..
2000-02-17    12
2000-02-16    12
2000-02-15    12
2000-02-25    12
2013-12-31    12
Name: location, Length: 7670, dtype: int64

In [8]:
df_full[df_full["t"] == "2007-04-19"].sort_values("location")

Unnamed: 0,t,anomaly,location,latitude,longitude,fold
80449,2007-04-19,1,Atlantic City,39.356667,-74.418053,4
58113,2007-04-19,1,Baltimore,39.266944,-76.579444,4
12692,2007-04-19,1,Eastport,44.904598,-66.982903,4
5179,2007-04-19,1,Fort Pulaski,32.0367,-80.9017,4
27973,2007-04-19,1,Lewes,38.78278,-75.119164,4
42843,2007-04-19,1,New London,41.361401,-72.089996,4
65738,2007-04-19,0,Newport,41.504333,-71.326139,4
50473,2007-04-19,1,Portland,43.65806,-70.24417,4
88048,2007-04-19,1,Sandy Hook,40.466944,-74.009444,4
20332,2007-04-19,0,Sewells Point,36.946701,-76.330002,4


In [9]:
df_full["year"] = df_full["t"].apply(lambda t: int(t[:4]))
df_full.groupby("year")["anomaly"].agg(["count", "mean", "sum"]).sort_values("mean")                         

Unnamed: 0_level_0,count,mean,sum
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1994,4330,0.012933,56
2002,4255,0.013161,56
2001,4205,0.014744,62
2004,3934,0.015506,61
1999,4260,0.015962,68
2000,4352,0.016544,72
1995,4367,0.019006,83
2008,4366,0.023591,103
2007,4364,0.026123,114
1997,4195,0.02646,111


In [10]:
locations = [
    "Date",
    "Average_SLA",
    "Atlantic City",
    "Baltimore",
    "Eastport",
    "Fort Pulaski",
    "Lewes",
    "New London",
    "Newport",
    "Portland",
    "Sandy Hook",
    "Sewells Point",
    "The Battery",
    "Washington",
][2:]
print(locations)

['Atlantic City', 'Baltimore', 'Eastport', 'Fort Pulaski', 'Lewes', 'New London', 'Newport', 'Portland', 'Sandy Hook', 'Sewells Point', 'The Battery', 'Washington']


In [11]:
filenames = glob.glob(f"{satelite_dir}/*.nc")
print(len(filenames))

dict_sla = {}
for filename in tqdm(filenames):
    date_str = filename.split("/")[-1].split(".")[0].split("_")[2] # dt_ena_19930101_vDT2021.nc"
    if len(date_str) != 8:
        continue
    formatted_date = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:8]}"

    nc_filename = filename
    
    dataset = netCDF4.Dataset(nc_filename, mode="r")
    sla = dataset.variables["sla"][:]
    sla = np.ma.getdata(sla[0, :, :].filled(0))
    dict_sla[formatted_date] = sla

    dataset.close()
len(dict_sla)

7302


100%|██████████████████████████████████████| 7302/7302 [00:54<00:00, 134.54it/s]


7302

In [12]:
dates = list(dict_sla.keys())
dates.sort()
print(len(dates), dates[:3], dates[-3:])

7302 ['1993-01-01', '1993-01-02', '1993-01-03'] ['2013-12-29', '2013-12-30', '2013-12-31']


In [13]:
cities_coordinates = {
    "Atlantic City": {"latitude": 39.3643, "longitude": -74.4229},
    "Baltimore": {"latitude": 39.2904, "longitude": -76.6122},
    "Eastport": {"latitude": 44.9034, "longitude": -66.9895},
    "Fort Pulaski": {"latitude": 32.0317, "longitude": -80.8970},
    "Lewes": {"latitude": 38.7746, "longitude": -75.1394},
    "New London": {"latitude": 41.3557, "longitude": -72.0995},
    "Newport": {"latitude": 41.4901, "longitude": -71.3128},
    "Portland": {"latitude": 43.6591, "longitude": -70.2568},
    "Sandy Hook": {"latitude": 40.4649, "longitude": -74.0054},
    "Sewells Point": {"latitude": 36.9463, "longitude": -76.3297},
    "The Battery": {"latitude": 40.7033, "longitude": -74.0170},
    "Washington": {"latitude": 38.9072, "longitude": -77.0369},
}

# 100 160
cities_indexes = {
    'Atlantic City': {'latitude': (55, 60), 'longitude': (100, 105)},
    'Baltimore': {'latitude': (55, 60), 'longitude': (91, 96)},
    'Eastport': {'latitude': (77, 82), 'longitude': (130, 135)},
    'Fort Pulaski': {'latitude': (26, 31), 'longitude': (74, 79)},
    'Lewes': {'latitude': (53, 58), 'longitude': (97, 102)},
    'New London': {'latitude': (63, 68), 'longitude': (109, 114)},
    'Newport': {'latitude': (63, 68), 'longitude': (112, 117)},
    'Portland': {'latitude': (72, 77), 'longitude': (116, 121)},
    'Sandy Hook': {'latitude': (59, 64), 'longitude': (101, 106)},
    'Sewells Point': {'latitude': (45, 50), 'longitude': (92, 97)},
    'The Battery': {'latitude': (60, 65), 'longitude': (101, 106)},
    'Washington': {'latitude': (53, 58), 'longitude': (89, 94)}
}

def get_features_row(row, verbose=False, margin = 3):
    sla = dict_sla[row["t"]]
    indexes = cities_indexes[row["location"]]
    latitude = indexes["latitude"]
    longitude = indexes["longitude"]

    sla_row = sla[latitude[0] - margin : latitude[1] + margin, longitude[0] - margin : longitude[1] + margin]
    if verbose:
        print(sla.shape, sla_row.shape)
        print(sla.shape)

    return sla_row

get_features_row(df_full.iloc[0], verbose=True)

(100, 160) (11, 11)
(100, 160)


array([[ 0.    ,  0.    ,  0.    , -0.0195, -0.0245, -0.0204, -0.0118,
         0.    ,  0.0127,  0.0235,  0.0292],
       [ 0.    ,  0.    , -0.0173, -0.0157, -0.0197, -0.0251, -0.0305,
        -0.025 , -0.0001,  0.029 ,  0.045 ],
       [ 0.    ,  0.    ,  0.    , -0.019 , -0.0162, -0.0268, -0.0362,
        -0.0282, -0.0114,  0.016 ,  0.038 ],
       [ 0.    ,  0.    ,  0.    , -0.0142, -0.0128, -0.0228, -0.036 ,
        -0.0271, -0.0184, -0.0039, -0.0034],
       [ 0.    ,  0.    ,  0.    ,  0.    , -0.0039, -0.0206, -0.0248,
        -0.0315, -0.0382, -0.0372, -0.033 ],
       [ 0.    ,  0.    ,  0.    ,  0.    ,  0.    , -0.0259, -0.0245,
        -0.0213, -0.0208, -0.0418, -0.0426],
       [ 0.    ,  0.    ,  0.    ,  0.    ,  0.    , -0.0031,  0.0283,
        -0.0062, -0.0147, -0.0205, -0.0213],
       [ 0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
        -0.0028, -0.0072, -0.0016,  0.0103],
       [ 0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,  0.    ,
 

In [14]:
def sampling(df, target_ratio=0.02, target_col="anomaly"):
    if df[target_col].mean() >= target_ratio:
        return df.copy()
    
    df_pos = df[df[target_col] == 1].copy()
    df_neg = df[df[target_col] != 1].copy().sample(frac=1, random_state=seed).reset_index(drop=True)

    nb_neg = int( (df_pos.shape[0])*(1-target_ratio) / target_ratio)
    df_com = pd.concat([df_pos, df_neg.head(nb_neg)]).sample(frac=1, random_state=seed).reset_index(drop=True)
    return df_com

df_com = sampling(df_full)
print(df_com["anomaly"].value_counts())
df_com["anomaly"].value_counts(normalize=True)

anomaly
0    87950
1     2498
Name: count, dtype: int64


anomaly
0    0.972382
1    0.027618
Name: proportion, dtype: float64

In [15]:
city = locations[3]
fold = 0
dict_models = {}
target_col = "anomaly"
ignored_cities = ["Newport"]

# if True:
run_folds = [-1] # list(range(nb_folds)) + [-1]
print("run_folds:", run_folds)
run_margins = [3, 5, 7, 10, 15, 20] # list(range(nb_folds)) + [-1]
print("run_margins:", run_margins)

run_max_samples = [100, 500, 1000, 3000] # list(range(nb_folds)) + [-1]
print("run_max_samples:", run_max_samples)

for city in locations:
    print("="*40)
    dict_models[city] = []
        
    df_folds = df_full[df_full["location"] == city]
    df_folds = df_folds[df_folds["t"].isin(dates)].sort_values("t").reset_index(drop=True)
    print(city, df_folds.shape, df_folds[target_col].mean())
    
    for fold in run_folds:
        df_fold_train = df_folds[df_folds["fold"] != fold].reset_index(drop=True)
        if fold >= 0:
            df_fold_val = df_folds[df_folds["fold"] == fold].reset_index(drop=True)
        else:
            df_fold_val = df_fold_train.copy()

        # Only sampling the train set
        # df_fold_train = sampling(df_fold_train).sort_values("t").reset_index(drop=True)

        for margin in run_margins:
            print("="*40)
            print( f"Processing margin {margin} city {city}  fold {fold} ... ")
        
            X_train = []
            for index, row in tqdm(df_fold_train.iterrows()):
                img = get_features_row(row, margin=margin)
                X_train.append(img.flatten())
            X_train = np.stack(X_train)
            
            X_val = []
            for index, row in tqdm(df_fold_val.iterrows()):
                img = get_features_row(row, margin=margin)
                X_val.append(img.flatten())
            X_val = np.stack(X_val)
            
            y_train = np.array(df_fold_train[target_col])
            y_val = np.array(df_fold_val[target_col])
            
            print(X_train.shape, X_val.shape, y_train.mean(), y_val.mean())

            for max_samples in run_max_samples:
                        
            # model = CascadeForestClassifier(random_state=1)
            # model = gcForest(shape_1X=[8,8], window=[4,6], tolerance=0.0, min_samples_mgs=10, min_samples_cascade=7)
            # model.fit(X_train, y_train) # Be careful when fold==-1, train == val
                model = IsolationModel(max_samples=max_samples, random_state=seed)
                model.fit(X_train)
                            
                y_pred = model.predict_proba(X_val)[:,1]
                best_threshold, best_f1, auc = get_best_threshold(y_pred, y_val)

                y_pred_bin = (y_pred >= best_threshold)
                acc = accuracy_score(y_val, y_pred_bin)
                
                if (y_val.mean() == 0) or (y_val.mean() == 1):
                    auc = -1
                    f1 = -1
                else:
                    auc = float(roc_auc_score(y_val, y_pred))
                    f1 = float(f1_score(y_val, y_pred_bin, pos_label=1))
        
                print(f"F1: {f1}, ACC: {acc}, AUC: {auc}")
                
                dict_models[city].append( {
                    "model": model,
                    "params": {
                        "fold": fold,
                        "margin": margin,
                        "max_samples": max_samples,
                        "best_threshold": best_threshold,

                        "acc": acc,
                        "auc": auc,
                        "best_f1": f1,
                    },
                    "y_pred": y_pred,
                    "y_val": y_val,
                    "df_val": df_fold_val
                }
                )
                print("="*40)
        
len(dict_models)

run_folds: [-1]
run_margins: [3, 5, 7, 10, 15, 20]
run_max_samples: [100, 500, 1000, 3000]
Atlantic City (7063, 7) 0.04289961772617868
Processing margin 3 city Atlantic City  fold -1 ... 


7063it [00:00, 10222.87it/s]
7063it [00:00, 10231.12it/s]


(7063, 121) (7063, 121) 0.04289961772617868 0.04289961772617868
F1: 0.1358936484490399, ACC: 0.91717400538015, AUC: 0.5514299802761342
F1: 0.11505922165820642, ACC: 0.9259521449808863, AUC: 0.5375485773429414
F1: 0.10465116279069768, ACC: 0.9345887016848364, AUC: 0.5309283886968579
F1: 0.10276679841897234, ACC: 0.935721364859125, AUC: 0.5299097291385942
Processing margin 5 city Atlantic City  fold -1 ... 


7063it [00:00, 9751.33it/s] 
7063it [00:00, 10053.24it/s]


(7063, 225) (7063, 225) 0.04289961772617868 0.04289961772617868
F1: 0.1361573373676248, ACC: 0.919156165935155, AUC: 0.5488334114476536
F1: 0.13458528951486698, ACC: 0.9217046580773043, AUC: 0.5487955748237546
F1: 0.14093959731543623, ACC: 0.927509556845533, AUC: 0.551232741617357
F1: 0.13771517996870108, ACC: 0.9219878238708764, AUC: 0.5524408284023669
Processing margin 7 city Atlantic City  fold -1 ... 


7063it [00:00, 9598.91it/s]
7063it [00:00, 9721.26it/s]


(7063, 361) (7063, 361) 0.04289961772617868 0.04289961772617868
F1: 0.14384508990318118, ACC: 0.9123601868894238, AUC: 0.5575224090456383
F1: 0.13986013986013987, ACC: 0.912926518476568, AUC: 0.5541273653992618
F1: 0.12732919254658384, ACC: 0.9204304120062297, AUC: 0.5449001113129065
F1: 0.13670886075949368, ACC: 0.9034404643919015, AUC: 0.5553713359501631
Processing margin 10 city Atlantic City  fold -1 ... 


7063it [00:00, 9362.19it/s]
7063it [00:00, 9794.46it/s]


(7063, 625) (7063, 625) 0.04289961772617868 0.04289961772617868
F1: 0.14925373134328357, ACC: 0.8950870734815234, AUC: 0.5739940340187863
F1: 0.13142174432497014, ACC: 0.8970692340365284, AUC: 0.5621785107504833
F1: 0.1425, ACC: 0.9028741328047571, AUC: 0.5652855078407248
F1: 0.13682432432432431, ACC: 0.8553022794846382, AUC: 0.5750048821450192
Processing margin 15 city Atlantic City  fold -1 ... 


7063it [00:00, 8886.72it/s]
7063it [00:00, 8839.92it/s]


(7063, 1225) (7063, 1225) 0.04289961772617868 0.04289961772617868
F1: 0.14535901926444833, ACC: 0.8618150927367974, AUC: 0.5817412658425606
F1: 0.16, ACC: 0.9197224975222993, AUC: 0.5806872107329076
F1: 0.14285714285714285, ACC: 0.9388361885884186, AUC: 0.5620383931884313
F1: 0.15294117647058825, ACC: 0.8980603143140309, AUC: 0.5911589235846662
Processing margin 20 city Atlantic City  fold -1 ... 


7063it [00:00, 8076.41it/s]
7063it [00:00, 8598.57it/s]


(7063, 2025) (7063, 2025) 0.04289961772617868 0.04289961772617868
F1: 0.13570274636510501, ACC: 0.848506300438907, AUC: 0.5757462358661901
F1: 0.1246376811594203, ACC: 0.9144839303412148, AUC: 0.5646127482570742
F1: 0.13548387096774195, ACC: 0.8861673509840011, AUC: 0.5793019020836995
F1: 0.15089514066496162, ACC: 0.9059889565340506, AUC: 0.5858283535454137
Baltimore (7286, 7) 0.023744166895415867
Processing margin 3 city Baltimore  fold -1 ... 


7286it [00:00, 9776.33it/s]
7286it [00:00, 9762.68it/s]


(7286, 121) (7286, 121) 0.023744166895415867 0.023744166895415867
F1: 0.08843537414965986, ACC: 0.9264342574800988, AUC: 0.5461513519575408
F1: 0.08609271523178808, ACC: 0.9242382651660719, AUC: 0.5423367943901461
F1: 0.045454545454545456, ACC: 0.9596486412297557, AUC: 0.5095587416673371
F1: 0.044444444444444446, ACC: 0.9645896239363162, AUC: 0.5095879969021957
Processing margin 5 city Baltimore  fold -1 ... 


7286it [00:00, 10199.43it/s]
7286it [00:00, 9825.16it/s] 


(7286, 225) (7286, 225) 0.023744166895415867 0.023744166895415867
F1: 0.11711711711711711, ACC: 0.946198188306341, AUC: 0.5666076686097019
F1: 0.1111111111111111, ACC: 0.9407082075212737, AUC: 0.5589944000604609
F1: 0.10309278350515463, ACC: 0.9522371671699149, AUC: 0.5516216745533904
F1: 0.1016949152542373, ACC: 0.9563546527587153, AUC: 0.5504478082546896
Processing margin 7 city Baltimore  fold -1 ... 


7286it [00:00, 10003.36it/s]
7286it [00:00, 10175.67it/s]


(7286, 361) (7286, 361) 0.023744166895415867 0.023744166895415867
F1: 0.12043010752688173, ACC: 0.9438649464726874, AUC: 0.5623587520691985
F1: 0.1111111111111111, ACC: 0.9363162228932199, AUC: 0.5612039829376969
F1: 0.09631728045325778, ACC: 0.9562174032390887, AUC: 0.5374999288935264
F1: 0.10818307905686546, ACC: 0.9117485588800439, AUC: 0.5818346120308902
Processing margin 10 city Baltimore  fold -1 ... 


7286it [00:00, 9967.43it/s] 
7286it [00:00, 10082.39it/s]


(7286, 625) (7286, 625) 0.023744166895415867 0.023744166895415867
F1: 0.1111111111111111, ACC: 0.9253362613230853, AUC: 0.5711150063914563
F1: 0.09375, ACC: 0.9442766950315674, AUC: 0.5420726846310061
F1: 0.09090909090909091, ACC: 0.9725500960746637, AUC: 0.5369343276862603
F1: 0.09747899159663866, ACC: 0.9262970079604721, AUC: 0.555547158219624
Processing margin 15 city Baltimore  fold -1 ... 


7286it [00:00, 9435.50it/s] 
7286it [00:00, 9226.34it/s]


(7286, 1225) (7286, 1225) 0.023744166895415867 0.023744166895415867
F1: 0.10431654676258993, ACC: 0.9316497392259127, AUC: 0.5892873831111154
F1: 0.10011641443538999, ACC: 0.8939061213285754, AUC: 0.5823010704978022
F1: 0.10189228529839883, ACC: 0.9153170463903376, AUC: 0.5735033712594947
F1: 0.09621993127147767, ACC: 0.9639033763381828, AUC: 0.5583235612722451
Processing margin 20 city Baltimore  fold -1 ... 


7286it [00:00, 8300.46it/s]
7286it [00:00, 8870.76it/s]


(7286, 2025) (7286, 2025) 0.023744166895415867 0.023744166895415867
F1: 0.11930926216640503, ACC: 0.9230030194894318, AUC: 0.607281790485385
F1: 0.10973936899862825, ACC: 0.9109250617622838, AUC: 0.5929483506955026
F1: 0.10377358490566038, ACC: 0.9478451825418611, AUC: 0.5888176740625527
F1: 0.12293144208037825, ACC: 0.9490804282185012, AUC: 0.6156715417264977
Eastport (7127, 7) 0.06159674477339694
Processing margin 3 city Eastport  fold -1 ... 


7127it [00:00, 9356.04it/s] 
7127it [00:00, 9729.66it/s]


(7127, 121) (7127, 121) 0.06159674477339694 0.06159674477339694
F1: 0.04126547455295736, ACC: 0.9022028904167251, AUC: 0.4952687845364084
F1: 0.01834862385321101, ACC: 0.9249333520415322, AUC: 0.4972725092914519
F1: 0.013245033112582781, ACC: 0.9372807632945138, AUC: 0.49873025907074586
F1: 0.013392857142857142, ACC: 0.9379823207520696, AUC: 0.49962602587437743
Processing margin 5 city Eastport  fold -1 ... 


7127it [00:00, 9953.26it/s] 
7127it [00:00, 10031.03it/s]


(7127, 225) (7127, 225) 0.06159674477339694 0.06159674477339694
F1: 0.07851239669421488, ACC: 0.87484214957205, AUC: 0.5032596715567131
F1: 0.02631578947368421, ACC: 0.9065525466535709, AUC: 0.4910975084740221
F1: 0.01609657947686117, ACC: 0.9313876806510453, AUC: 0.49571172929995316
F1: 0.015810276679841896, ACC: 0.930124877227445, AUC: 0.4980832974572484
Processing margin 7 city Eastport  fold -1 ... 


7127it [00:00, 10011.18it/s]
7127it [00:00, 9801.62it/s] 


(7127, 361) (7127, 361) 0.06159674477339694 0.06159674477339694
F1: 0.08793264733395698, ACC: 0.8631962957766242, AUC: 0.5078500506806466
F1: 0.04052443384982122, ACC: 0.8870492493335204, AUC: 0.4899047421826465
F1: 0.023529411764705882, ACC: 0.9068331696365932, AUC: 0.4889626543579906
F1: 0.019417475728155338, ACC: 0.9291426967868669, AUC: 0.4950695360268553
Processing margin 10 city Eastport  fold -1 ... 


7127it [00:00, 10185.53it/s]
7127it [00:00, 9579.94it/s] 


(7127, 625) (7127, 625) 0.06159674477339694 0.06159674477339694
F1: 0.08435072142064373, ACC: 0.8842430195032973, AUC: 0.510377611688156
F1: 0.040983606557377046, ACC: 0.9015013329591693, AUC: 0.4961492245316128
F1: 0.024096385542168676, ACC: 0.9204433843131753, AUC: 0.4938999302459919
F1: 0.016877637130801686, ACC: 0.9346148449558018, AUC: 0.49888965787838824
Processing margin 15 city Eastport  fold -1 ... 


7127it [00:00, 9369.57it/s]
7127it [00:00, 8348.08it/s]


(7127, 1225) (7127, 1225) 0.06159674477339694 0.06159674477339694
F1: 0.08627450980392157, ACC: 0.8692296899116038, AUC: 0.5086153693147758
F1: 0.031446540880503145, ACC: 0.9135681212291287, AUC: 0.4958775994267093
F1: 0.11604546656093048, ACC: 0.06159674477339694, AUC: 0.4931967703349282
F1: 0.027164685908319185, ACC: 0.9196015153641083, AUC: 0.49794007694739023
Processing margin 20 city Eastport  fold -1 ... 


7127it [00:00, 8579.03it/s]
7127it [00:00, 9275.78it/s]


(7127, 1935) (7127, 1935) 0.06159674477339694 0.06159674477339694
F1: 0.10571428571428572, ACC: 0.8243300126280342, AUC: 0.5147464332813811
F1: 0.060836501901140684, ACC: 0.8960291847902343, AUC: 0.5022085930943532
F1: 0.0847457627118644, ACC: 0.8636172302511576, AUC: 0.504926036228488
F1: 0.07646356033452807, ACC: 0.8915392170618773, AUC: 0.5064503724755044
Fort Pulaski (7262, 7) 0.014596529881575324
Processing margin 3 city Fort Pulaski  fold -1 ... 


7262it [00:00, 10469.81it/s]
7262it [00:00, 10433.54it/s]


(7262, 121) (7262, 121) 0.014596529881575324 0.014596529881575324
F1: 0.11055276381909548, ACC: 0.975626549160011, AUC: 0.5435536348966957
F1: 0.11016949152542373, ACC: 0.97108234646103, AUC: 0.5478922556081716
F1: 0.12834224598930483, ACC: 0.9775543927292757, AUC: 0.5502342670618139
F1: 0.11822660098522167, ACC: 0.9753511429358304, AUC: 0.55232975099402
Processing margin 5 city Fort Pulaski  fold -1 ... 


7262it [00:00, 10063.94it/s]
7262it [00:00, 10120.72it/s]


(7262, 225) (7262, 225) 0.014596529881575324 0.014596529881575324
F1: 0.125, ACC: 0.9845772514458827, AUC: 0.548467706213021
F1: 0.13138686131386862, ACC: 0.9836133296612504, AUC: 0.5520093970490524
F1: 0.1301775147928994, ACC: 0.979757642522721, AUC: 0.5536045751289326
F1: 0.1267605633802817, ACC: 0.9829248141007987, AUC: 0.5537284980541464
Processing margin 7 city Fort Pulaski  fold -1 ... 


7262it [00:00, 10591.26it/s]
7262it [00:00, 10312.98it/s]


(7262, 361) (7262, 361) 0.014596529881575324 0.014596529881575324
F1: 0.12162162162162163, ACC: 0.9820985954282567, AUC: 0.5438166415305272
F1: 0.12987012987012986, ACC: 0.9815477829798953, AUC: 0.545801385827436
F1: 0.13114754098360656, ACC: 0.9854034701184247, AUC: 0.5546025501756014
F1: 0.13138686131386862, ACC: 0.9836133296612504, AUC: 0.5629976691943428
Processing margin 10 city Fort Pulaski  fold -1 ... 


7262it [00:00, 10068.75it/s]
7262it [00:00, 10169.76it/s]


(7262, 625) (7262, 625) 0.014596529881575324 0.014596529881575324
F1: 0.11428571428571428, ACC: 0.9829248141007987, AUC: 0.5382124249870804
F1: 0.13432835820895522, ACC: 0.9840264389975213, AUC: 0.5515532552179462
F1: 0.13114754098360656, ACC: 0.9854034701184247, AUC: 0.5472528660472278
F1: 0.1322314049586777, ACC: 0.985541173230515, AUC: 0.5489693303943386
Processing margin 15 city Fort Pulaski  fold -1 ... 


7262it [00:00, 9712.14it/s] 
7262it [00:00, 9244.73it/s]


(7262, 1225) (7262, 1225) 0.014596529881575324 0.014596529881575324
F1: 0.07272727272727272, ACC: 0.971908565133572, AUC: 0.5202534882985119
F1: 0.06956521739130435, ACC: 0.9852657670063344, AUC: 0.52886349494289
F1: 0.07017543859649122, ACC: 0.978105205177637, AUC: 0.5157909446618222
F1: 0.0945945945945946, ACC: 0.9815477829798953, AUC: 0.5320854909984497
Processing margin 20 city Fort Pulaski  fold -1 ... 


7262it [00:00, 8556.24it/s]
7262it [00:00, 9198.88it/s]


(7262, 2025) (7262, 2025) 0.014596529881575324 0.014596529881575324
F1: 0.07142857142857142, ACC: 0.9856788763426053, AUC: 0.5205422023476803
F1: 0.07017543859649122, ACC: 0.9854034701184247, AUC: 0.5180637438434036
F1: 0.07239819004524888, ACC: 0.9717708620214817, AUC: 0.5103759083286752
F1: 0.06956521739130435, ACC: 0.9852657670063344, AUC: 0.5169840323992533
Lewes (7271, 7) 0.040984733874295146
Processing margin 3 city Lewes  fold -1 ... 


7271it [00:00, 10482.58it/s]
7271it [00:00, 10437.64it/s]


(7271, 121) (7271, 121) 0.040984733874295146 0.040984733874295146
F1: 0.17229729729729729, ACC: 0.9326089946362262, AUC: 0.5676461076616711
F1: 0.14701803051317613, ACC: 0.9154174116352634, AUC: 0.5624891118860186
F1: 0.1692047377326565, ACC: 0.9324714619722184, AUC: 0.5659321621171596
F1: 0.1488095238095238, ACC: 0.9213313161875946, AUC: 0.5622597997838258
Processing margin 5 city Lewes  fold -1 ... 


7271it [00:00, 10166.41it/s]
7271it [00:00, 10257.73it/s]


(7271, 225) (7271, 225) 0.040984733874295146 0.040984733874295146
F1: 0.13996889580093314, ACC: 0.9239444368037409, AUC: 0.557343665932932
F1: 0.15629522431259044, ACC: 0.9198184568835098, AUC: 0.5667387247263415
F1: 0.15, ACC: 0.9251822307798102, AUC: 0.559749638346181
F1: 0.15593220338983052, ACC: 0.9315087333241645, AUC: 0.5604156781141449
Processing margin 7 city Lewes  fold -1 ... 


7271it [00:00, 9933.54it/s] 
7271it [00:00, 10033.84it/s]


(7271, 361) (7271, 361) 0.040984733874295146 0.040984733874295146
F1: 0.15584415584415584, ACC: 0.9374226378764957, AUC: 0.5662591664685551
F1: 0.15135135135135136, ACC: 0.9352221152523724, AUC: 0.5604250623449797
F1: 0.15458937198067632, ACC: 0.9277953513959566, AUC: 0.5604474401262011
F1: 0.15714285714285714, ACC: 0.9350845825883647, AUC: 0.5756154852321081
Processing margin 10 city Lewes  fold -1 ... 


7271it [00:00, 10320.55it/s]
7271it [00:00, 9292.12it/s]


(7271, 625) (7271, 625) 0.040984733874295146 0.040984733874295146
F1: 0.1423570595099183, ACC: 0.8989134919543391, AUC: 0.5807467345282908
F1: 0.14065934065934066, ACC: 0.9462247283729885, AUC: 0.5582977294011321
F1: 0.13517665130568357, ACC: 0.9225691101636638, AUC: 0.5518538908946011
F1: 0.15612382234185734, ACC: 0.913767019667171, AUC: 0.5788612259944157
Processing margin 15 city Lewes  fold -1 ... 


7271it [00:00, 9495.73it/s] 
7271it [00:00, 9355.95it/s] 


(7271, 1225) (7271, 1225) 0.040984733874295146 0.040984733874295146
F1: 0.14354995150339475, ACC: 0.8785586576811993, AUC: 0.5929924339037341
F1: 0.15718157181571815, ACC: 0.9144546829872094, AUC: 0.5899589211310742
F1: 0.14825581395348839, ACC: 0.9194058588914867, AUC: 0.5757668360319814
F1: 0.15563298490127758, ACC: 0.9000137532664008, AUC: 0.5889860892012047
Processing margin 20 city Lewes  fold -1 ... 


7271it [00:00, 9019.33it/s]
7271it [00:00, 9261.53it/s] 


(7271, 2025) (7271, 2025) 0.040984733874295146 0.040984733874295146
F1: 0.13458950201884254, ACC: 0.9115664970430477, AUC: 0.5800749198490438
F1: 0.1400560224089636, ACC: 0.8733324164489066, AUC: 0.5762119854433737
F1: 0.12487205731832139, ACC: 0.882409572273415, AUC: 0.5869302207844832
F1: 0.15168539325842698, ACC: 0.9169302709393481, AUC: 0.605044433129896
New London (7240, 7) 0.004281767955801105
Processing margin 3 city New London  fold -1 ... 


7240it [00:00, 10055.53it/s]
7240it [00:00, 10394.00it/s]


(7240, 121) (7240, 121) 0.004281767955801105 0.004281767955801105
F1: 0.0975609756097561, ACC: 0.9948895027624309, AUC: 0.5248211241324687
F1: 0.058823529411764705, ACC: 0.9911602209944751, AUC: 0.5504969146989203
F1: 0.06557377049180328, ACC: 0.9921270718232044, AUC: 0.5505215255124641
F1: 0.08333333333333333, ACC: 0.9939226519337017, AUC: 0.5498547962000904
Processing margin 5 city New London  fold -1 ... 


7240it [00:00, 9695.00it/s]
7240it [00:00, 9498.95it/s]


(7240, 225) (7240, 225) 0.004281767955801105 0.004281767955801105
F1: 0.07547169811320754, ACC: 0.993232044198895, AUC: 0.5366701121805628
F1: 0.12244897959183673, ACC: 0.994060773480663, AUC: 0.5446753386224209
F1: 0.08955223880597014, ACC: 0.9915745856353592, AUC: 0.53571923983909
F1: 0.1, ACC: 0.9925414364640884, AUC: 0.549550517050819
Processing margin 7 city New London  fold -1 ... 


7240it [00:00, 9976.45it/s] 
7240it [00:00, 9996.49it/s] 


(7240, 361) (7240, 361) 0.004281767955801105 0.004281767955801105
F1: 0.10526315789473684, ACC: 0.9929558011049724, AUC: 0.5433374053043015
F1: 0.09090909090909091, ACC: 0.9917127071823204, AUC: 0.5485369990021433
F1: 0.10126582278481013, ACC: 0.9901933701657458, AUC: 0.551633486815316
F1: 0.11267605633802817, ACC: 0.9912983425414365, AUC: 0.5490829115934831
Processing margin 10 city New London  fold -1 ... 


7240it [00:00, 10420.66it/s]
7240it [00:00, 9667.47it/s] 


(7240, 625) (7240, 625) 0.004281767955801105 0.004281767955801105
F1: 0.05714285714285714, ACC: 0.9863259668508287, AUC: 0.5298708156023608
F1: 0.09836065573770492, ACC: 0.9924033149171271, AUC: 0.5291213044626116
F1: 0.09090909090909091, ACC: 0.9917127071823204, AUC: 0.544699949435965
F1: 0.08163265306122448, ACC: 0.9937845303867403, AUC: 0.5175810702571606
Processing margin 15 city New London  fold -1 ... 


7240it [00:00, 8747.47it/s]
7240it [00:00, 9075.18it/s]


(7240, 1225) (7240, 1225) 0.004281767955801105 0.004281767955801105
F1: 0.09523809523809523, ACC: 0.9947513812154696, AUC: 0.5244967088630252
F1: 0.0547945205479452, ACC: 0.9904696132596685, AUC: 0.5171268888799395
F1: 0.06557377049180328, ACC: 0.9921270718232044, AUC: 0.5284791859637819
F1: 0.12244897959183673, ACC: 0.994060773480663, AUC: 0.5429570563677124
Processing margin 20 city New London  fold -1 ... 


7240it [00:00, 8704.64it/s]
7240it [00:00, 9200.76it/s]


(7240, 2025) (7240, 2025) 0.004281767955801105 0.004281767955801105
F1: 0.0625, ACC: 0.9917127071823204, AUC: 0.5426013182446673
F1: 0.06451612903225806, ACC: 0.9879834254143647, AUC: 0.5172119080540006
F1: 0.058823529411764705, ACC: 0.9911602209944751, AUC: 0.5482797041332743
F1: 0.061855670103092786, ACC: 0.9874309392265194, AUC: 0.5540296851158274
Newport (7255, 7) 0.0005513439007580978
Processing margin 3 city Newport  fold -1 ... 


7255it [00:00, 10022.99it/s]
7255it [00:00, 10022.29it/s]


(7255, 121) (7255, 121) 0.0005513439007580978 0.0005513439007580978
F1: 0.25, ACC: 0.9991729841488629, AUC: 0.7371397048682939
F1: 0.0851063829787234, ACC: 0.9940730530668505, AUC: 0.7412081092263136
F1: 0.11764705882352941, ACC: 0.9958649207443143, AUC: 0.7436905254447662
F1: 0.15384615384615385, ACC: 0.9969676085458304, AUC: 0.7433112674113915
Processing margin 5 city Newport  fold -1 ... 


7255it [00:00, 10108.89it/s]
7255it [00:00, 10154.60it/s]


(7255, 225) (7255, 225) 0.0005513439007580978 0.0005513439007580978
F1: 0.08, ACC: 0.9936595451412819, AUC: 0.7368638808440214
F1: 0.10526315789473684, ACC: 0.9953135768435561, AUC: 0.7405530271686663
F1: 0.08163265306122448, ACC: 0.9937973811164714, AUC: 0.740932285202041
F1: 0.06779661016949153, ACC: 0.9924190213645762, AUC: 0.7413804992414839
Processing margin 7 city Newport  fold -1 ... 


7255it [00:00, 9676.26it/s] 
7255it [00:00, 9739.18it/s] 


(7255, 361) (7255, 361) 0.0005513439007580978 0.0005513439007580978
F1: 0.10256410256410256, ACC: 0.9951757408683667, AUC: 0.7301751482554131
F1: 0.11764705882352941, ACC: 0.9958649207443143, AUC: 0.7407943731899048
F1: 0.08163265306122448, ACC: 0.9937973811164714, AUC: 0.735553716728727
F1: 0.1111111111111111, ACC: 0.9977946243969676, AUC: 0.736553578816715
Processing margin 10 city Newport  fold -1 ... 


7255it [00:00, 9810.73it/s] 
7255it [00:00, 9313.24it/s] 


(7255, 625) (7255, 625) 0.0005513439007580978 0.0005513439007580978
F1: 0.2857142857142857, ACC: 0.9993108201240524, AUC: 0.7372258998758792
F1: 0.2222222222222222, ACC: 0.9980702963473467, AUC: 0.7433802234174597
F1: 0.125, ACC: 0.9961405926946933, AUC: 0.7431043993931871
F1: 0.15384615384615385, ACC: 0.9984838042729153, AUC: 0.7431388773962212
Processing margin 15 city Newport  fold -1 ... 


7255it [00:00, 9443.82it/s] 
7255it [00:00, 9417.02it/s]


(7255, 1225) (7255, 1225) 0.0005513439007580978 0.0005513439007580978
F1: 0.4, ACC: 0.9995864920744314, AUC: 0.6963867052820301
F1: 0.10810810810810811, ACC: 0.9954514128187457, AUC: 0.7227968556061233
F1: 0.4, ACC: 0.9995864920744314, AUC: 0.7196248793269894
F1: 0.2222222222222222, ACC: 0.9990351481736733, AUC: 0.7130395807474832
Processing margin 20 city Newport  fold -1 ... 


7255it [00:00, 8529.98it/s]
7255it [00:00, 8468.96it/s]


(7255, 2025) (7255, 2025) 0.0005513439007580978 0.0005513439007580978
F1: 0.17391304347826086, ACC: 0.997381116471399, AUC: 0.6877672045235139
F1: 0.10256410256410256, ACC: 0.9951757408683667, AUC: 0.6849055302716867
F1: 0.1, ACC: 0.9950379048931771, AUC: 0.6963177492759619
F1: 0.08333333333333333, ACC: 0.9939352170916609, AUC: 0.6707695490277203
Portland (7254, 7) 0.011717673007995588
Processing margin 3 city Portland  fold -1 ... 


7254it [00:00, 8462.87it/s] 
7254it [00:00, 8194.79it/s]


(7254, 121) (7254, 121) 0.011717673007995588 0.011717673007995588
F1: 0.019169329073482427, ACC: 0.9576785221946512, AUC: 0.4979199658661066
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49435067652392245
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49693123169200726
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49874459478309385
Processing margin 5 city Portland  fold -1 ... 


7254it [00:00, 9970.91it/s] 
7254it [00:00, 10067.91it/s]


(7254, 225) (7254, 225) 0.011717673007995588 0.011717673007995588
F1: 0.01904761904761905, ACC: 0.9716018748276812, AUC: 0.48977049879792905
F1: 0.010752688172043012, ACC: 0.9746346843121036, AUC: 0.49413898074224805
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49525735806946575
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.4983261263774585
Processing margin 7 city Portland  fold -1 ... 


7254it [00:00, 9952.65it/s] 
7254it [00:00, 9916.89it/s] 


(7254, 361) (7254, 361) 0.011717673007995588 0.011717673007995588
F1: 0.01834862385321101, ACC: 0.9704990350151641, AUC: 0.4947084259844264
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49323476077556144
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49727995536337005
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49874459478309385
Processing margin 10 city Portland  fold -1 ... 


7254it [00:00, 9827.31it/s]
7254it [00:00, 9563.45it/s] 


(7254, 625) (7254, 625) 0.011717673007995588 0.011717673007995588
F1: 0.02197802197802198, ACC: 0.9509236283429832, AUC: 0.5032755409319537
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49086343981029434
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49121216348165714
F1: 0.02316391879002589, ACC: 0.011717673007995588, AUC: 0.49637327381782675
Processing margin 15 city Portland  fold -1 ... 


7254it [00:00, 9196.22it/s]
7254it [00:00, 9140.96it/s]


(7254, 1225) (7254, 1225) 0.011717673007995588 0.011717673007995588
F1: 0.02158273381294964, ACC: 0.9625034463744141, AUC: 0.5007278068152913
F1: 0.021052631578947368, ACC: 0.9743589743589743, AUC: 0.4969304111657217
F1: 0.031578947368421054, ACC: 0.9746346843121036, AUC: 0.5061564087205533
F1: 0.02843601895734597, ACC: 0.9717397298042459, AUC: 0.5074019676220327
Processing margin 20 city Portland  fold -1 ... 


7254it [00:00, 8522.60it/s]
7254it [00:00, 8661.00it/s]


(7254, 2025) (7254, 2025) 0.011717673007995588 0.011717673007995588
F1: 0.027777777777777776, ACC: 0.9807003032809485, AUC: 0.4933036849835485
F1: 0.02821316614420063, ACC: 0.9145299145299145, AUC: 0.5123152790199634
F1: 0.023529411764705882, ACC: 0.919906258615936, AUC: 0.510009600157541
F1: 0.029315960912052116, ACC: 0.9178384339674662, AUC: 0.5157040525793245
Sandy Hook (7202, 7) 0.045404054429325186
Processing margin 3 city Sandy Hook  fold -1 ... 


7202it [00:00, 9604.94it/s]
7202it [00:00, 9824.48it/s]


(7202, 121) (7202, 121) 0.045404054429325186 0.045404054429325186
F1: 0.0794912559618442, ACC: 0.9196056650930298, AUC: 0.5177628023352794
F1: 0.08540925266903915, ACC: 0.9286309358511524, AUC: 0.5200126772310258
F1: 0.07243460764587525, ACC: 0.9359900027770064, AUC: 0.5151670836808452
F1: 0.053268765133171914, ACC: 0.9457095251319078, AUC: 0.510686460939672
Processing margin 5 city Sandy Hook  fold -1 ... 


7202it [00:00, 9950.08it/s] 
7202it [00:00, 10128.24it/s]


(7202, 225) (7202, 225) 0.045404054429325186 0.045404054429325186
F1: 0.09975062344139651, ACC: 0.8997500694251597, AUC: 0.5279724214623298
F1: 0.08710217755443886, ACC: 0.9243265759511247, AUC: 0.5229448985265499
F1: 0.08403361344537816, ACC: 0.9243265759511247, AUC: 0.5189110925771475
F1: 0.08940397350993377, ACC: 0.9236323243543461, AUC: 0.5225100917431192
Processing margin 7 city Sandy Hook  fold -1 ... 


7202it [00:00, 9983.21it/s] 
7202it [00:00, 9959.83it/s] 


(7202, 361) (7202, 361) 0.045404054429325186 0.045404054429325186
F1: 0.10427807486631016, ACC: 0.9069702860316579, AUC: 0.5321252154573255
F1: 0.08214849921011058, ACC: 0.9193279644543182, AUC: 0.5178706700027801
F1: 0.09375, ACC: 0.9114134962510414, AUC: 0.5258090631081457
F1: 0.1018957345971564, ACC: 0.8947514579283532, AUC: 0.5314244092299139
Processing margin 10 city Sandy Hook  fold -1 ... 


7202it [00:00, 9832.03it/s]
7202it [00:00, 9230.53it/s]


(7202, 625) (7202, 625) 0.045404054429325186 0.045404054429325186
F1: 0.11742424242424243, ACC: 0.8705915023604555, AUC: 0.5479370586599943
F1: 0.115, ACC: 0.9016939738961399, AUC: 0.5355623019182653
F1: 0.10775862068965517, ACC: 0.8850319355734518, AUC: 0.5377641367806505
F1: 0.11123470522803114, ACC: 0.8890585948347681, AUC: 0.5353105365582429
Processing margin 15 city Sandy Hook  fold -1 ... 


7202it [00:00, 7660.37it/s]
7202it [00:00, 7593.51it/s]


(7202, 1225) (7202, 1225) 0.045404054429325186 0.045404054429325186
F1: 0.13756613756613756, ACC: 0.8642043876700917, AUC: 0.564612955240478
F1: 0.12097407698350353, ACC: 0.844626492640933, AUC: 0.5550707812065611
F1: 0.11901983663943991, ACC: 0.7903360177728409, AUC: 0.5637916041145399
F1: 0.11623246492985972, ACC: 0.8775340183282422, AUC: 0.5448295802057271
Processing margin 20 city Sandy Hook  fold -1 ... 


7202it [00:00, 8643.12it/s]
7202it [00:00, 9019.41it/s]


(7202, 2025) (7202, 2025) 0.045404054429325186 0.045404054429325186
F1: 0.12210526315789473, ACC: 0.8262982504859762, AUC: 0.5593461217681401
F1: 0.12466487935656836, ACC: 0.8186614829214107, AUC: 0.5673710314150681
F1: 0.12627291242362526, ACC: 0.8808664259927798, AUC: 0.5649612454823464
F1: 0.1273792093704246, ACC: 0.8344904193279644, AUC: 0.5764190158465388
Sewells Point (7290, 7) 0.015363511659807956
Processing margin 3 city Sewells Point  fold -1 ... 


7290it [00:00, 7966.75it/s]
7290it [00:00, 8048.56it/s]


(7290, 121) (7290, 121) 0.015363511659807956 0.015363511659807956
F1: 0.17813765182186234, ACC: 0.972153635116598, AUC: 0.6155788520479242
F1: 0.19626168224299065, ACC: 0.9764060356652949, AUC: 0.6158947975958285
F1: 0.22026431718061673, ACC: 0.9757201646090535, AUC: 0.6117738227918641
F1: 0.2048780487804878, ACC: 0.9776406035665295, AUC: 0.6130301417028221
Processing margin 5 city Sewells Point  fold -1 ... 


7290it [00:00, 7988.14it/s]
7290it [00:00, 7925.14it/s]


(7290, 225) (7290, 225) 0.015363511659807956 0.015363511659807956
F1: 0.1827956989247312, ACC: 0.9791495198902607, AUC: 0.6236435487401981
F1: 0.23300970873786409, ACC: 0.9783264746227709, AUC: 0.6077038470724038
F1: 0.20600858369098712, ACC: 0.9746227709190672, AUC: 0.6026549377064843
F1: 0.20080321285140562, ACC: 0.9727023319615912, AUC: 0.6023663575209968
Processing margin 7 city Sewells Point  fold -1 ... 


7290it [00:00, 7932.71it/s]
7290it [00:00, 8003.10it/s]


(7290, 361) (7290, 361) 0.015363511659807956 0.015363511659807956
F1: 0.16981132075471697, ACC: 0.9758573388203018, AUC: 0.6098308820602635
F1: 0.2127659574468085, ACC: 0.9746227709190672, AUC: 0.6043565656967719
F1: 0.22033898305084745, ACC: 0.9747599451303155, AUC: 0.6067149623850655
F1: 0.21686746987951808, ACC: 0.9732510288065843, AUC: 0.630072543088007
Processing margin 10 city Sewells Point  fold -1 ... 


7290it [00:00, 7667.97it/s]
7290it [00:00, 7789.20it/s]


(7290, 625) (7290, 625) 0.015363511659807956 0.015363511659807956
F1: 0.1890909090909091, ACC: 0.9694101508916324, AUC: 0.6575349281534849
F1: 0.19771863117870722, ACC: 0.9710562414266118, AUC: 0.6273559089280739
F1: 0.20430107526881722, ACC: 0.9796982167352538, AUC: 0.6460588902599212
F1: 0.19377162629757785, ACC: 0.9680384087791495, AUC: 0.644103510727222
Processing margin 15 city Sewells Point  fold -1 ... 


7290it [00:00, 8741.80it/s]
7290it [00:00, 8874.90it/s]


(7290, 1225) (7290, 1225) 0.015363511659807956 0.015363511659807956
F1: 0.21929824561403508, ACC: 0.9755829903978052, AUC: 0.6938437885602835
F1: 0.19469026548672566, ACC: 0.9750342935528121, AUC: 0.6404440154440154
F1: 0.20717131474103587, ACC: 0.9727023319615912, AUC: 0.6901880249174063
F1: 0.2116788321167883, ACC: 0.9703703703703703, AUC: 0.6920445159813716
Processing margin 20 city Sewells Point  fold -1 ... 


7290it [00:01, 6767.07it/s]
7290it [00:00, 7360.34it/s]


(7290, 2025) (7290, 2025) 0.015363511659807956 0.015363511659807956
F1: 0.1, ACC: 0.9629629629629629, AUC: 0.671184024598973
F1: 0.12195121951219512, ACC: 0.980246913580247, AUC: 0.7118041734665446
F1: 0.15384615384615385, ACC: 0.9637860082304527, AUC: 0.7160147076384189
F1: 0.10282776349614396, ACC: 0.9521262002743485, AUC: 0.730488496596744
The Battery (6892, 7) 0.007254788160185722
Processing margin 3 city The Battery  fold -1 ... 


6892it [00:00, 8079.11it/s]
6892it [00:00, 8061.49it/s]


(6892, 121) (6892, 121) 0.007254788160185722 0.007254788160185722
F1: 0.05228758169934641, ACC: 0.9789611143354614, AUC: 0.5256913183279743
F1: 0.056338028169014086, ACC: 0.9902785838653512, AUC: 0.5348099970768782
F1: 0.06666666666666667, ACC: 0.991874637260592, AUC: 0.5389754457760889
F1: 0.06779661016949153, ACC: 0.9920197330237958, AUC: 0.5404180064308681
Processing margin 5 city The Battery  fold -1 ... 


6892it [00:00, 7911.97it/s]
6892it [00:00, 7937.59it/s]


(6892, 225) (6892, 225) 0.007254788160185722 0.007254788160185722
F1: 0.03636363636363636, ACC: 0.9692396982008126, AUC: 0.5333820520315697
F1: 0.05333333333333334, ACC: 0.9793964016250726, AUC: 0.529137679041216
F1: 0.05517241379310345, ACC: 0.9801218804410912, AUC: 0.5262700964630226
F1: 0.05714285714285714, ACC: 0.9904236796285548, AUC: 0.5307994738380591
Processing margin 7 city The Battery  fold -1 ... 


6892it [00:00, 10073.86it/s]
6892it [00:00, 9891.85it/s]


(6892, 361) (6892, 361) 0.007254788160185722 0.007254788160185722
F1: 0.05263157894736842, ACC: 0.9843296575739988, AUC: 0.5738263665594855
F1: 0.047619047619047616, ACC: 0.9825885084155542, AUC: 0.5595776088862905
F1: 0.06382978723404255, ACC: 0.9872315728380732, AUC: 0.5354983922829583
F1: 0.049586776859504134, ACC: 0.9833139872315728, AUC: 0.5786378251973108
Processing margin 10 city The Battery  fold -1 ... 


6892it [00:00, 9339.49it/s]
6892it [00:00, 9281.75it/s]


(6892, 625) (6892, 625) 0.007254788160185722 0.007254788160185722
F1: 0.07246376811594203, ACC: 0.9814277423099246, AUC: 0.5797369190295234
F1: 0.05309734513274336, ACC: 0.9844747533372026, AUC: 0.5787591347559193
F1: 0.058823529411764705, ACC: 0.9767846778874056, AUC: 0.573386436714411
F1: 0.0547945205479452, ACC: 0.9899883923389438, AUC: 0.5593525285004385
Processing margin 15 city The Battery  fold -1 ... 


6892it [00:00, 9693.45it/s]
6892it [00:00, 9884.44it/s]


(6892, 1225) (6892, 1225) 0.007254788160185722 0.007254788160185722
F1: 0.07407407407407407, ACC: 0.9818630295995356, AUC: 0.5991961414790996
F1: 0.08187134502923976, ACC: 0.9772199651770168, AUC: 0.6243510669394913
F1: 0.09345794392523364, ACC: 0.9859257109692396, AUC: 0.5901403098509207
F1: 0.0847457627118644, ACC: 0.9843296575739988, AUC: 0.6226337328266589
Processing margin 20 city The Battery  fold -1 ... 


6892it [00:00, 8950.43it/s]
6892it [00:00, 8883.44it/s]


(6892, 2025) (6892, 2025) 0.007254788160185722 0.007254788160185722
F1: 0.06289308176100629, ACC: 0.9783807312826466, AUC: 0.6610961707103187
F1: 0.08849557522123894, ACC: 0.9850551363900174, AUC: 0.6331043554516224
F1: 0.06711409395973154, ACC: 0.9798316889146836, AUC: 0.6487167494884537
F1: 0.08264462809917356, ACC: 0.9838943702843876, AUC: 0.6865229465068693
Washington (6912, 7) 0.06336805555555555
Processing margin 3 city Washington  fold -1 ... 


6912it [00:00, 9947.61it/s] 
6912it [00:00, 10163.25it/s]


(6912, 121) (6912, 121) 0.06336805555555555 0.06336805555555555
F1: 0.13641364136413642, ACC: 0.8864293981481481, AUC: 0.5380457552020518
F1: 0.08605341246290801, ACC: 0.9108796296296297, AUC: 0.5164548252722868
F1: 0.10903873744619799, ACC: 0.91015625, AUC: 0.5259838087862515
F1: 0.09049773755656108, ACC: 0.9127604166666666, AUC: 0.5190495032465654
Processing margin 5 city Washington  fold -1 ... 


6912it [00:00, 10338.36it/s]
6912it [00:00, 10340.96it/s]


(6912, 225) (6912, 225) 0.06336805555555555 0.06336805555555555
F1: 0.15316315205327413, ACC: 0.8896122685185185, AUC: 0.5496638820825981
F1: 0.13942857142857143, ACC: 0.8910590277777778, AUC: 0.5401110941835483
F1: 0.12789115646258503, ACC: 0.9072627314814815, AUC: 0.5326481549661942
F1: 0.14512471655328799, ACC: 0.8909143518518519, AUC: 0.5437297133740441
Processing margin 7 city Washington  fold -1 ... 


6912it [00:00, 9836.94it/s]
6912it [00:00, 9995.00it/s] 


(6912, 361) (6912, 361) 0.06336805555555555 0.06336805555555555
F1: 0.15570469798657718, ACC: 0.9089988425925926, AUC: 0.5455883950272463
F1: 0.13836477987421383, ACC: 0.9207175925925926, AUC: 0.5356469785005846
F1: 0.13220338983050847, ACC: 0.9259259259259259, AUC: 0.5334462190172703
F1: 0.14, ACC: 0.9253472222222222, AUC: 0.5392070565366488
Processing margin 10 city Washington  fold -1 ... 


6912it [00:00, 9657.20it/s]
6912it [00:00, 9429.00it/s]


(6912, 625) (6912, 625) 0.06336805555555555 0.06336805555555555
F1: 0.14917127071823205, ACC: 0.9108796296296297, AUC: 0.5416428622815815
F1: 0.1288244766505636, ACC: 0.9217303240740741, AUC: 0.5335225693783211
F1: 0.1347305389221557, ACC: 0.9163773148148148, AUC: 0.5366746226211485
F1: 0.13714285714285715, ACC: 0.9126157407407407, AUC: 0.5400520240427816
Processing margin 15 city Washington  fold -1 ... 


6912it [00:00, 7699.22it/s]
6912it [00:00, 8003.20it/s]


(6912, 1225) (6912, 1225) 0.06336805555555555 0.06336805555555555
F1: 0.14191852825229961, ACC: 0.9055266203703703, AUC: 0.5389323362998888
F1: 0.12933753943217666, ACC: 0.9201388888888888, AUC: 0.534349904006613
F1: 0.13333333333333333, ACC: 0.9228877314814815, AUC: 0.5380365861055744
F1: 0.13515825491873396, ACC: 0.8537326388888888, AUC: 0.5429265357883941
Processing margin 20 city Washington  fold -1 ... 


6912it [00:00, 7262.69it/s]
6912it [00:00, 7798.85it/s]


(6912, 2025) (6912, 2025) 0.06336805555555555 0.06336805555555555
F1: 0.15484569572279372, ACC: 0.7741608796296297, AUC: 0.5719086391227008
F1: 0.15067519545131486, ACC: 0.8271122685185185, AUC: 0.560853001045277
F1: 0.1468298109010011, ACC: 0.8890335648148148, AUC: 0.5576469911962567
F1: 0.15226824457593688, ACC: 0.6890914351851852, AUC: 0.5845341323142942


12

In [16]:
dict_meta = {}

f1_full = []
for city in locations:
    for meta_run in dict_models[city]:
        best_f1 = meta_run["params"]["best_f1"]
        
        # print( city, margin, fold, "=>", best_threshold, best_f1)

        if (city not in dict_meta) or (best_f1 > dict_meta[city]["params"]["best_f1"]):
            dict_meta[city] = {
                "model": meta_run["model"],
                "params": meta_run["params"]
            }
                
for city in locations:
    print(dict_meta[city]["params"])

{'fold': -1, 'margin': 15, 'max_samples': 500, 'best_threshold': 0.277, 'acc': 0.9197224975222993, 'auc': 0.5806872107329076, 'best_f1': 0.16}
{'fold': -1, 'margin': 20, 'max_samples': 3000, 'best_threshold': 0.373, 'acc': 0.9490804282185012, 'auc': 0.6156715417264977, 'best_f1': 0.12293144208037825}
{'fold': -1, 'margin': 15, 'max_samples': 1000, 'best_threshold': 0.0, 'acc': 0.06159674477339694, 'auc': 0.4931967703349282, 'best_f1': 0.11604546656093048}
{'fold': -1, 'margin': 10, 'max_samples': 500, 'best_threshold': 0.624, 'acc': 0.9840264389975213, 'auc': 0.5515532552179462, 'best_f1': 0.13432835820895522}
{'fold': -1, 'margin': 3, 'max_samples': 100, 'best_threshold': 0.298, 'acc': 0.9326089946362262, 'auc': 0.5676461076616711, 'best_f1': 0.17229729729729729}
{'fold': -1, 'margin': 5, 'max_samples': 500, 'best_threshold': 0.8270000000000001, 'acc': 0.994060773480663, 'auc': 0.5446753386224209, 'best_f1': 0.12244897959183673}
{'fold': -1, 'margin': 15, 'max_samples': 100, 'best_thr

In [17]:
dict_meta

{'Atlantic City': {'model': <Isolation.IsolationModel at 0x77db4182f310>,
  'params': {'fold': -1,
   'margin': 15,
   'max_samples': 500,
   'best_threshold': 0.277,
   'acc': 0.9197224975222993,
   'auc': 0.5806872107329076,
   'best_f1': 0.16}},
 'Baltimore': {'model': <Isolation.IsolationModel at 0x77db3f19ed30>,
  'params': {'fold': -1,
   'margin': 20,
   'max_samples': 3000,
   'best_threshold': 0.373,
   'acc': 0.9490804282185012,
   'auc': 0.6156715417264977,
   'best_f1': 0.12293144208037825}},
 'Eastport': {'model': <Isolation.IsolationModel at 0x77db3ee85490>,
  'params': {'fold': -1,
   'margin': 15,
   'max_samples': 1000,
   'best_threshold': 0.0,
   'acc': 0.06159674477339694,
   'auc': 0.4931967703349282,
   'best_f1': 0.11604546656093048}},
 'Fort Pulaski': {'model': <Isolation.IsolationModel at 0x77db3cbb3310>,
  'params': {'fold': -1,
   'margin': 10,
   'max_samples': 500,
   'best_threshold': 0.624,
   'acc': 0.9840264389975213,
   'auc': 0.5515532552179462,
   'b

In [18]:
sub_dir = "subs/sub_isolation"
!mkdir -p $sub_dir
with open(f'{sub_dir}/dict_meta.pickle', 'wb') as handle:
    pickle.dump(dict_meta, handle)

In [19]:
model.predict_proba([img.flatten()])

array([[1., 0.]])

In [20]:
prob = model.predict_proba([img.flatten()])
prob

array([[1., 0.]])

In [21]:
1-prob
# array([[0.99483734, 0.00516266]])

array([[0., 1.]])

In [22]:
df_full[df_full["anomaly"] == 1].sort_values("fold")

Unnamed: 0,t,anomaly,location,latitude,longitude,fold,year
10,1993-01-11,1,Fort Pulaski,32.036700,-80.901700,0,1993
47155,1998-02-27,1,Portland,43.658060,-70.244170,0,1998
48921,2003-01-04,1,Portland,43.658060,-70.244170,0,2003
49261,2003-12-24,1,Portland,43.658060,-70.244170,0,2003
49262,2003-12-25,1,Portland,43.658060,-70.244170,0,2003
...,...,...,...,...,...,...,...
50470,2007-04-16,1,Portland,43.658060,-70.244170,4,2007
48862,2002-11-06,1,Portland,43.658060,-70.244170,4,2002
48671,2002-04-29,1,Portland,43.658060,-70.244170,4,2002
46747,1997-01-11,1,Portland,43.658060,-70.244170,4,1997
