In [1]:
# Run this on GOOGLE COLAB

## Import and install dependencies

In [1]:
!pip install ipython-autotime numpy pandas numba scipy scikit-learn tqdm
%load_ext autotime

Collecting ipython-autotime
  Downloading ipython_autotime-0.3.1-py2.py3-none-any.whl (6.8 kB)
Collecting jedi>=0.16 (from ipython->ipython-autotime)
  Downloading jedi-0.19.0-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, ipython-autotime
Successfully installed ipython-autotime-0.3.1 jedi-0.19.0
time: 534 µs (started: 2023-08-11 18:03:57 +00:00)


In [None]:
import os.path
import time
import pickle
import joblib
import math

import numpy as np
import pandas as pd
from numba import jit
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from tqdm import notebook

time: 3.31 s (started: 2023-07-05 10:53:49 +00:00)


## Basic functions

In [None]:
## load data into np array
def convert_data_format_calms21(data):
    # This is altered to fit with the calms21 data conversion script calms21_convert_to_npy.py
    ## Going through all sequences and creating giant dataframe
    keypoint_names = ['nose', 'ear_left', 'ear_right', 'neck', 'hip_left', 'hip_right', 'tail_base']
    keypoints_idx = pd.MultiIndex.from_product([['resident', 'intruder'], keypoint_names, list('xy')],
                                               names=['animal', 'keypoints', 'coords'])
    #task 1 has only 1 annotator (main key)
    data = data[list(data.keys())[0]]
    #sequence names are the keys of the dictionary
    sequence_names = list(data.keys())
    #sequence_names = list(data["sequences"].keys()) # old code
    collection = []
    targets = []
    for sequence_idx in range(len(sequence_names)):
        sequence_key = sequence_names[sequence_idx]
        single_sequence = data[sequence_key]
        #single_sequence = data["sequences"][sequence_key] # old code
        single_keypoints = single_sequence['keypoints']
        single_keypoints_2d = single_keypoints.reshape(single_keypoints.shape[0],
                                                       single_keypoints.shape[1] *
                                                       single_keypoints.shape[2] *
                                                       single_keypoints.shape[3],
                                                       order='F')
        # convert to dataframe
        df_single_keypoints = pd.DataFrame(single_keypoints_2d, columns=keypoints_idx)
        pose = [ 0,  2,  12, 14, 16, 18, 20, 22, 24, 26,  1,  3,  13, 15, 17, 19, 21, 23, 25, 27]

        collection.append(np.array(df_single_keypoints.iloc[:, pose]))
        # training we have annotations, but testing we do not.
        try:
            single_annotations = single_sequence['annotations']
            targets.append(np.array(single_annotations))
        except:
            pass
    if targets:
        return collection, targets
    else:
        return collection

def unison_shuffled_copies(a, b, s):
  assert len(a) == len(b)
  np.random.seed(s)
  p = np.random.permutation(len(a))
  return a[p], b[p]

time: 1.23 ms (started: 2023-07-05 10:53:53 +00:00)


In [None]:
## FAST NUMBA PROCESSING FUNCTIONS

@jit(nopython=True)
def fast_standardize(data):
    a_ = (data - np.mean(data)) / np.std(data)
    return a_


def fast_nchoose2(n, k):
    a = np.ones((k, n - k + 1), dtype=int)
    a[0] = np.arange(n - k + 1)
    for j in range(1, k):
        reps = (n - k + j) - a[j - 1]
        a = np.repeat(a, reps, axis=1)
        ind = np.add.accumulate(reps)
        a[j, ind[:-1]] = 1 - reps[1:]
        a[j, 0] = j
        a[j] = np.add.accumulate(a[j])
        return a


@jit(nopython=True, parallel=True)
def fast_running_mean(x, N):
    out = np.zeros_like(x, dtype=np.float64)
    dim_len = x.shape[0]
    for i in range(dim_len):
        if N % 2 == 0:
            a, b = i - (N - 1) // 2, i + (N - 1) // 2 + 2
        else:
            a, b = i - (N - 1) // 2, i + (N - 1) // 2 + 1
        a = max(0, a)
        b = min(dim_len, b)
        out[i] = np.mean(x[a:b])
    return out


@jit(nopython=True)
def np_apply_along_axis(func1d, axis, arr):
    assert arr.ndim == 2
    assert axis in [0, 1]
    if axis == 0:
        result = np.empty(arr.shape[1])
        for i in range(len(result)):
            result[i] = func1d(arr[:, i])
    else:
        result = np.empty(arr.shape[0])
        for i in range(len(result)):
            result[i] = func1d(arr[i, :])
    return result


@jit(nopython=True)
def np_mean(array, axis):
    return np_apply_along_axis(np.mean, axis, array)


@jit(nopython=True)
def np_std(array, axis):
    return np_apply_along_axis(np.std, axis, array)


@jit(nopython=True)
def angle_between(vector1, vector2):
    """ Returns the angle in radians between given vectors"""
    v1_u = unit_vector(vector1)
    v2_u = unit_vector(vector2)
    minor = np.linalg.det(
        np.stack((v1_u[-2:], v2_u[-2:]))
    )
    if minor == 0:
        sign = 1
    else:
        sign = -np.sign(minor)
    dot_p = np.dot(v1_u, v2_u)
    dot_p = min(max(dot_p, -1.0), 1.0)
    return sign * np.arccos(dot_p)


@jit(nopython=True)
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)


@jit(nopython=True)
def fast_displacment(data, reduce=False):
    data_length = data.shape[0]
    if reduce:
        displacement_array = np.zeros((data_length, int(data.shape[1] / 10)), dtype=np.float64)
    else:
        displacement_array = np.zeros((data_length, int(data.shape[1] / 2)), dtype=np.float64)
    for r in range(data_length):
        if r < data_length - 1:
            if reduce:
                count = 0
                for c in range(int(data.shape[1]/2 - 2), data.shape[1], int(data.shape[1]/2)):
                    displacement_array[r, count] = np.linalg.norm(data[r + 1, c:c + 2] - data[r, c:c + 2])
                    count += 1
            else:
                for c in range(0, data.shape[1], 2):
                    displacement_array[r, int(c / 2)] = np.linalg.norm(data[r + 1, c:c + 2] - data[r, c:c + 2])
    return displacement_array


@jit(nopython=True)
def fast_length_angle(data, index):
    data_length = data.shape[0]
    length_2d_array = np.zeros((data_length, index.shape[1], 2), dtype=np.float64)
    for r in range(data_length):
        for i in range(index.shape[1]):
            ref = index[0, i]
            target = index[1, i]
            length_2d_array[r, i, :] = data[r, ref:ref + 2] - data[r, target:target + 2]
    length_array = np.zeros((data_length, length_2d_array.shape[1]), dtype=np.float64)
    angle_array = np.zeros((data_length, length_2d_array.shape[1]), dtype=np.float64)
    for k in range(length_2d_array.shape[1]):
        for kk in range(data_length):
            length_array[kk, k] = np.linalg.norm(length_2d_array[kk, k, :])
            if kk < data_length - 1:
                try:
                    angle_array[kk, k] = np.rad2deg(
                        angle_between(length_2d_array[kk, k, :], length_2d_array[kk + 1, k, :]))
                except:
                    pass
    return length_array, angle_array


@jit(nopython=True)
def fast_smooth(data, n):
    data_boxcar_avg = np.zeros((data.shape[0], data.shape[1]))
    for body_part in range(data.shape[1]):
        data_boxcar_avg[:, body_part] = fast_running_mean(data[:, body_part], n)
    return data_boxcar_avg


@jit(nopython=True)
def fast_feature_extraction(data, framerate, index, smooth):
    window = np.int(np.round(0.05 / (1 / framerate)) * 2 - 1)
    features = []
    for n in range(len(data)):
        displacement_raw = fast_displacment(data[n])
        length_raw, angle_raw = fast_length_angle(data[n], index)
        if smooth:
            displacement_run_mean = fast_smooth(displacement_raw, window)
            length_run_mean = fast_smooth(length_raw, window)
            angle_run_mean = fast_smooth(angle_raw, window)
            features.append(np.hstack((length_run_mean[1:, :], angle_run_mean[:-1, :], displacement_run_mean[:-1, :])))
        else:
            features.append(np.hstack((length_raw[1:, :], angle_raw[:-1, :], displacement_raw[:-1, :])))
    return features


@jit(nopython=True)
def fast_feature_binning(features, framerate, index):
    binned_features_list = []
    for n in range(len(features)):
        bin_width = int(framerate / 10)
        for s in range(bin_width):
            binned_features = np.zeros((int(features[n].shape[0] / bin_width), features[n].shape[1]), dtype=np.float64)
            for b in range(bin_width + s, features[n].shape[0], bin_width):
                binned_features[int(b / bin_width - 1), 0:index.shape[1]] = np_mean(features[n][(b - bin_width):b,
                                                                                    0:index.shape[1]], 0)
                binned_features[int(b / bin_width - 1), index.shape[1]:] = np.sum(features[n][(b - bin_width):b,
                                                                                  index.shape[1]:], axis=0)
            binned_features_list.append(binned_features)
    return binned_features_list


def bsoid_extract_numba(data, fps):
    smooth = False
    index = fast_nchoose2(int(data[0].shape[1] / 2), 2)
    # print(len(data)) # 1
    features = fast_feature_extraction(data, fps, index * 2, smooth)
    # print(len(features)) # 1
    binned_features = fast_feature_binning(features, fps, index * 2)
    # print(len(binned_features)) # 1
    return binned_features


def feature_extraction(train_datalist, num_train, framerate):
    f_integrated = []
    for i in notebook.tqdm(range(num_train)):
        binned_features = bsoid_extract_numba([train_datalist[i]], framerate)
        f_integrated.append(binned_features[0])  # getting only the non-shifted
    features = np.vstack([f_integrated[m] for m in range(len(f_integrated))])
    scaler = StandardScaler()
    scaler.fit(features)
    scaled_features = scaler.transform(features)
    return features, scaled_features


def bsoid_predict_numba(feats, scaler, clf):
    """
    :param feats: list, multiple feats (original feature space)
    :param clf: Obj, MLP classifier
    :return nonfs_labels: list, label/100ms
    """
    labels_fslow = []
    for i in range(0, len(feats)):
        scaled_feats = scaler.transform(feats[i])
        labels = clf.predict(np.nan_to_num(scaled_feats))
        labels_fslow.append(labels)
    return labels_fslow


def frameshift_predict(data_test, num_test, scaler, rf_model, framerate):
    labels_fs = []
    new_predictions = []
    for i in range(num_test):
        feats_new = bsoid_extract_numba([data_test[i]], framerate)
        labels = bsoid_predict_numba(feats_new, scaler, rf_model)
        for m in range(0, len(labels)):
            labels[m] = labels[m][::-1]
        labels_pad = -1 * np.ones([len(labels), len(max(labels, key=lambda x: len(x)))])
        for n, l in enumerate(labels):
            labels_pad[n][0:len(l)] = l
            labels_pad[n] = labels_pad[n][::-1]
            if n > 0:
                labels_pad[n][0:n] = labels_pad[n - 1][0:n]
        labels_fs.append(labels_pad.astype(int))
    for k in range(0, len(labels_fs)):
        labels_fs2 = []
        for l in range(math.floor(framerate / 10)):
            labels_fs2.append(labels_fs[k][l])
        new_predictions.append(np.array(labels_fs2).flatten('F'))
    new_predictions_pad = []
    for i in range(0, len(new_predictions)):
        new_predictions_pad.append(np.pad(new_predictions[i], (len(data_test[i]) -
                                                               len(new_predictions[i]), 0), 'edge'))
    return np.hstack(new_predictions_pad)

time: 180 ms (started: 2023-07-05 10:53:53 +00:00)


In [None]:
train = np.load('/content/drive/MyDrive/asoid/calms21_task1_train.npy', allow_pickle=True).item()

with open('/content/drive/MyDrive/asoid/ALL_test_data.sav', 'rb') as fr:
    [data_test, targets_test] = joblib.load(fr)

time: 3.11 s (started: 2023-07-05 10:53:53 +00:00)


In [None]:
train_datalist, targets_raw = convert_data_format_calms21(train)
len(train_datalist), train_datalist[0].shape, len(targets_raw)

(70, (21364, 20), 70)

time: 200 ms (started: 2023-07-05 10:53:56 +00:00)


## Runtime parameters

In [None]:
#Run time parameters

k_runs = 3
full_data_ratio = 0.05

# sample rate for feature extraction
min_duration_sec = 0.4 # from preprocessing (10%tile)
framerate = 30
frames_per_bin = min_duration_sec / (1/framerate)
#frames_per_bin = 2.0
print("Selected min_duration: ", min_duration_sec, "s")
num2skip = int(frames_per_bin)
print("Selected num2skip (frames_per_bin): {} ({})".format(num2skip, frames_per_bin))
# num2skip*10 is the framerate of the features, a workaround for this function
framerate_for_feature_extraction = num2skip*10

## iteration 0 start with 1% of all data
ratio = 0.01 ## 1%

#active learning parameters
max_iter = 20
max_samples_per = 200

Selected min_duration:  0.4 s
Selected num2skip (frames_per_bin): 12 (12.0)
time: 4.48 ms (started: 2023-07-05 13:51:27 +00:00)


In [None]:
#subselect train set
if full_data_ratio == 1:
  print("Full data")
  sel_train_datalist = train_datalist
  sel_targets_raw = targets_raw
else:
  train_data_pick_size = int(len(train_datalist)*full_data_ratio)
  print(f"Selected {train_data_pick_size} of {len(train_datalist)}")

  sel_train_datalist = train_datalist[:train_data_pick_size]
  sel_targets_raw = targets_raw[:train_data_pick_size]

print(len(sel_train_datalist), sel_train_datalist[0].shape, len(sel_targets_raw))

Selected 3 of 70
3 (21364, 20) 3
time: 3.98 ms (started: 2023-07-05 13:51:28 +00:00)


## Feauture Extraction

In [None]:

#time it by running it multiple times

feat_extr_run_time = []

for i in range(k_runs):
  start_time = time.time()

  features, scaled_features = feature_extraction(sel_train_datalist, len(sel_train_datalist), framerate_for_feature_extraction)

  targets_ls = []
  for i in range(len(sel_train_datalist)):
      targets_not_matching = np.hstack(
          [stats.mode(sel_targets_raw[i][(num2skip - 1) + num2skip * n:(num2skip - 1) + num2skip * n + num2skip])[0]
          for n in range(len(sel_targets_raw[i]))])
      # features are skipped so if it's not multiple of 12, we discard the final few targets
      targets_matching_features = sel_targets_raw[i][(num2skip - 1):-1:num2skip]
      targets_ls.append(targets_not_matching[:targets_matching_features.shape[0]])
  targets = np.hstack(targets_ls)

  feat_extr_run_time.append(time.time()-start_time)


print(100 * '*')
print(f'Behaviors downsampled into {targets.shape[0]} representatives by mode')
print(f'each with {scaled_features.shape[1]} features')
print(100 * '*')

print(feat_extr_run_time)

print(f"For a {k_runs}-fold eval, it took {np.mean(feat_extr_run_time)} sec (+/- {np.std(feat_extr_run_time)}) per iteration.")

  0%|          | 0/3 [00:00<?, ?it/s]

  [stats.mode(sel_targets_raw[i][(num2skip - 1) + num2skip * n:(num2skip - 1) + num2skip * n + num2skip])[0]


  0%|          | 0/3 [00:00<?, ?it/s]

  [stats.mode(sel_targets_raw[i][(num2skip - 1) + num2skip * n:(num2skip - 1) + num2skip * n + num2skip])[0]


  0%|          | 0/3 [00:00<?, ?it/s]

  [stats.mode(sel_targets_raw[i][(num2skip - 1) + num2skip * n:(num2skip - 1) + num2skip * n + num2skip])[0]


****************************************************************************************************
Behaviors downsampled into 4492 representatives by mode
each with 100 features
****************************************************************************************************
[7.581881284713745, 7.474703788757324, 7.405364751815796]
For a 3-fold eval, it took 7.487316608428955 sec (+/- 0.07261236814588364) per iteration.
time: 22.5 s (started: 2023-07-05 13:51:28 +00:00)


In [None]:
# test set
test_features = []
for i in range(len(data_test)):
  test_features.append(bsoid_extract_numba([data_test[i]], framerate_for_feature_extraction))



time: 31.5 s (started: 2023-07-05 13:51:51 +00:00)


## Shuffle data for cross-validation (runtime purposes)

In [None]:
def unison_shuffled_copies(a, b, s):
    assert len(a) == len(b)
    np.random.seed(s)
    p = np.random.permutation(len(a))
    return a[p], b[p]

time: 784 µs (started: 2023-07-05 13:52:22 +00:00)


In [None]:
#shuffle data for run time eval

seeds = np.arange(k_runs)
features_runlist = []
targets_runlist = []
for seed in notebook.tqdm(seeds):
    scaled_features_shuf, targets_shuf = unison_shuffled_copies(scaled_features, targets, seed)
    features_runlist.append(scaled_features_shuf)
    targets_runlist.append(targets_shuf)

scaler = StandardScaler()
scaler.fit(features)

  0%|          | 0/3 [00:00<?, ?it/s]

time: 48.4 ms (started: 2023-07-05 13:52:22 +00:00)


## Iter 0

In [None]:
# iter 0

iter0_predict_prob = []
iter0_macro_scores = []
iter0_f1_scores = []

iter0_learning_runtime = []

for i in notebook.tqdm(range(len(features_runlist))):
    iter0_start_time = time.time()

    X = []
    Y = []
    samples2train = [int(len(np.where(targets_runlist[i] == b)[0]) * ratio)
                                  for b in np.unique(targets_runlist[i])]

    for b in np.unique(targets_runlist[i]):
        idx_b = np.where(targets_runlist[i] == b)[0]
        X.append(features_runlist[i][idx_b[:samples2train[int(b)]]])
        Y.append(targets_runlist[i][idx_b[:samples2train[int(b)]]])

    X_train = np.vstack(X)
    Y_train = np.hstack(Y)
    model_iter0 = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1,
                                         criterion='gini',
                                         class_weight='balanced_subsample'
                                        )
    model_iter0.fit(X_train, Y_train)

    ## test on remaining held out data
    #predict = frameshift_predict(data_test, len(data_test), scaler, model_iter0, framerate= framerate_for_feature_extraction)
    predict_list = []
    for t_idx, test_feat in enumerate(test_features):
      labels = bsoid_predict_numba(test_feat , scaler, model_iter0)
      predict_list.append(labels)

    predict = np.hstack(predict_list)
    ## check f1 scores per class
    #iter0_f1_scores.append(f1_score(targets_test[targets_test < 3],
                                    #predict[targets_test < 3], average=None))
    ## check f1 scores overall
    #iter0_macro_scores.append(f1_score(targets_test[targets_test < 3],
                                       #predict[targets_test < 3], average='macro'))

    iter0_predict_prob.append(model_iter0.predict_proba(features_runlist[i][targets_runlist[i] < 3]
                                                        ))

    #save model
    os.makedirs(f'/content/Calms21 data/iter0', exist_ok = True)
    with open(f'/content/Calms21 data/iter0/model_seed{i}.pkl', 'wb') as f:
         pickle.dump([model_iter0, X_train, Y_train, predict], f)

    iter0_learning_runtime.append(time.time()-iter0_start_time)

print(iter0_learning_runtime)
print(f"For a {k_runs}-fold eval, it took {np.mean(iter0_learning_runtime)} sec (+/- {np.std(iter0_learning_runtime)}) for iteration 0.")

  0%|          | 0/3 [00:00<?, ?it/s]

[15.646374464035034, 15.329357147216797, 15.876659393310547]
For a 3-fold eval, it took 15.61746366818746 sec (+/- 0.22436846589515952) for iteration 0.
time: 46.9 s (started: 2023-07-05 13:52:22 +00:00)


## Iter X

In [None]:
X_train = dict()
X_train_list = []
Y_train = dict()
Y_train_list = []

iterX_predict_prob = dict()
iterX_macro_scores = dict()
iterX_f1_scores = dict()
iterX_predict_prob_list = []
iterX_macro_scores_list = []
iterX_f1_scores_list = []
sampled_idx_list = []

total_learning_runtime = []
try:
  for it in range(max_iter):
      Y_train_it = []
      X_train_it = []
      iterX_predict_prob_it = []
      iterX_macro_scores_it = []
      iterX_f1_scores_it = []
      sampled_idx = []

      print(50*'=')
      print(f'Active Learning iteration {it+1}...')
      print(50*'=')
      iterX_runtime = []
      for i in notebook.tqdm(range(len(targets_runlist))):
          seed_runtime_start = time.time()
          if it == 0:
              ## start with 1% data (same as iteration 0 above)
              X = []
              Y = []
              samples2train = [int(len(np.where(targets_runlist[i] == b)[0]) * ratio)
                              for b in np.unique(targets_runlist[i])]

              for b in np.unique(targets_runlist[i]):
                  idx_b = np.where(targets_runlist[i] == b)[0]
                  X.append(features_runlist[i][idx_b[:samples2train[int(b)]]])
                  Y.append(targets_runlist[i][idx_b[:samples2train[int(b)]]])

              ## store in dict iteration 0
              X_train[it] = np.vstack(X)
              Y_train[it] = np.hstack(Y)
              ## retrieve iteration 0 predict probability
              idx_lowconf = np.where(iter0_predict_prob[i].max(1) < 0.5)[0]
              ## identify all features/targets that were low predict prob
              new_X_human = features_runlist[i][targets_runlist[i] < 3][idx_lowconf, :]
              new_Y_human = targets_runlist[i][targets_runlist[i] < 3][idx_lowconf]

          else:
              ## if after iteration 0, we will use predict prob from previous iteration
              idx_lowconf = np.where(iterX_predict_prob_list[it-1][i].max(1) < 0.5)[0]
              new_X_human = features_runlist[i][targets_runlist[i] < 3][idx_lowconf, :]
              new_Y_human = targets_runlist[i][targets_runlist[i] < 3][idx_lowconf]


          ## setting a random seed for sampling, seed is identical to train/test split
          np.random.seed(seeds[i])
          ## KEEP IN MIND THIS HAS NO PREFERENCE FOR BEHAVIOR A VS B, JUST GRABBING THE WORST AND SUBSAMPLE
          try:
              ## attempt sampling up to max_samples_per iteration
              idx_sampled = np.random.choice(np.arange(np.hstack(idx_lowconf).shape[0]),
                                            max_samples_per, replace=False)
          except:
              ## otherwise just grab all
              idx_sampled = np.random.choice(np.arange(np.hstack(idx_lowconf).shape[0]),
                                            np.hstack(idx_lowconf).shape[0], replace=False)
          ## subsample
          new_X_sampled = new_X_human[idx_sampled, :]
          new_Y_sampled = new_Y_human[idx_sampled]

          sampled_idx.append(idx_lowconf[idx_sampled])

          if it == 0:
              ## if iteration 1, we use iteration 0 as base, and append new samples
              X_train[it] = np.vstack((X_train[it],
                                      new_X_sampled))
              Y_train[it] = np.hstack((Y_train[it],
                                      new_Y_sampled))
          else:
              ## if iteration >1, we use previous iteration as base, and append new samples
              X_train[it] = np.vstack((X_train_list[it-1][i],
                                      new_X_sampled))
              Y_train[it] = np.hstack((Y_train_list[it-1][i],
                                      new_Y_sampled))

          ## model initialization
          model_iterX = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1,
                                        criterion='gini',
                                        class_weight='balanced_subsample'
                                        )
          X_train_it.append(X_train[it])
          Y_train_it.append(Y_train[it])
          model_iterX.fit(X_train[it], Y_train[it])


          ## test on remaining held out data
          predict_list = []
          for t_idx, test_feat in enumerate(test_features):
            labels = bsoid_predict_numba(test_feat , scaler, model_iter0)
            predict_list.append(labels)

          predict = np.hstack(predict_list)

          os.makedirs(f'/content/Calms21 data//iter{it+1}', exist_ok = True)
          with open(f'/content/Calms21 data//iter{it+1}/model_seed{i}.pkl', 'wb') as f:
              pickle.dump([model_iterX, X_train[it], Y_train[it], predict], f)

          iterX_runtime.append(time.time()-seed_runtime_start)

          ## save predict probability, as a visual guide for this active learning process
          iterX_predict_prob[it] = model_iterX.predict_proba(features_runlist[i][targets_runlist[i] < 3]
                                                            )
          iterX_predict_prob_it.append(iterX_predict_prob[it])
      print("Run time", "---"*10)
      print(iterX_runtime)
      print(f"For a {k_runs}-fold eval, it took {np.mean(iterX_runtime)} sec (+/- {np.std(iterX_runtime)}) for iteration {it}.")


      X_train_list.append(X_train_it)
      Y_train_list.append(Y_train_it)

      iterX_predict_prob_list.append(iterX_predict_prob_it)
      sampled_idx_list.append(sampled_idx)

      #nested list of runtime to collect each iteration
      total_learning_runtime.append(iterX_runtime)
except ValueError:
  print("No more samples found.")

finally:
  print(total_learning_runtime)


Active Learning iteration 1...


  0%|          | 0/3 [00:00<?, ?it/s]

Run time ------------------------------
[15.370765686035156, 15.435418605804443, 14.976552486419678]
For a 3-fold eval, it took 15.26091225941976 sec (+/- 0.20279769950070411) for iteration 0.
Active Learning iteration 2...


  0%|          | 0/3 [00:00<?, ?it/s]

Run time ------------------------------
[15.012662172317505, 15.560948610305786, 15.4623441696167]
For a 3-fold eval, it took 15.34531831741333 sec (+/- 0.23864309764920633) for iteration 1.
Active Learning iteration 3...


  0%|          | 0/3 [00:00<?, ?it/s]

Run time ------------------------------
[17.222721338272095, 16.120535850524902, 16.314236640930176]
For a 3-fold eval, it took 16.55249794324239 sec (+/- 0.4804716769208938) for iteration 2.
Active Learning iteration 4...


  0%|          | 0/3 [00:00<?, ?it/s]

Run time ------------------------------
[15.616735935211182, 15.50075101852417, 15.675925731658936]
For a 3-fold eval, it took 15.597804228464762 sec (+/- 0.07275691110683194) for iteration 3.
Active Learning iteration 5...


  0%|          | 0/3 [00:00<?, ?it/s]

Run time ------------------------------
[15.498757600784302, 16.16002130508423, 15.676974773406982]
For a 3-fold eval, it took 15.778584559758505 sec (+/- 0.27935739733409204) for iteration 4.
Active Learning iteration 6...


  0%|          | 0/3 [00:00<?, ?it/s]

Run time ------------------------------
[15.821732521057129, 16.0562641620636, 15.72863245010376]
For a 3-fold eval, it took 15.868876377741495 sec (+/- 0.1378466347980992) for iteration 5.
Active Learning iteration 7...


  0%|          | 0/3 [00:00<?, ?it/s]

Run time ------------------------------
[15.461837768554688, 15.23190712928772, 14.75701093673706]
For a 3-fold eval, it took 15.150251944859823 sec (+/- 0.2934801442124989) for iteration 6.
Active Learning iteration 8...


  0%|          | 0/3 [00:00<?, ?it/s]

Run time ------------------------------
[15.10561752319336, 15.054556369781494, 14.872844457626343]
For a 3-fold eval, it took 15.011006116867065 sec (+/- 0.0998942554572907) for iteration 7.
Active Learning iteration 9...


  0%|          | 0/3 [00:00<?, ?it/s]

No more samples found.
[[15.370765686035156, 15.435418605804443, 14.976552486419678], [15.012662172317505, 15.560948610305786, 15.4623441696167], [17.222721338272095, 16.120535850524902, 16.314236640930176], [15.616735935211182, 15.50075101852417, 15.675925731658936], [15.498757600784302, 16.16002130508423, 15.676974773406982], [15.821732521057129, 16.0562641620636, 15.72863245010376], [15.461837768554688, 15.23190712928772, 14.75701093673706], [15.10561752319336, 15.054556369781494, 14.872844457626343]]
time: 6min 16s (started: 2023-07-05 13:53:09 +00:00)


In [None]:
iter0_learning_runtime, total_learning_runtime

([15.646374464035034, 15.329357147216797, 15.876659393310547],
 [[15.370765686035156, 15.435418605804443, 14.976552486419678],
  [15.012662172317505, 15.560948610305786, 15.4623441696167],
  [17.222721338272095, 16.120535850524902, 16.314236640930176],
  [15.616735935211182, 15.50075101852417, 15.675925731658936],
  [15.498757600784302, 16.16002130508423, 15.676974773406982],
  [15.821732521057129, 16.0562641620636, 15.72863245010376],
  [15.461837768554688, 15.23190712928772, 14.75701093673706],
  [15.10561752319336, 15.054556369781494, 14.872844457626343]])

time: 4.29 ms (started: 2023-07-05 13:59:25 +00:00)
