In [2]:
import os
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression

In [13]:
rng = np.random.default_rng(2022)

In [14]:
os.system('ls /dccstor/hoo-misha-1/wilds/wilds/features/iwildcam/')

AFN
DANN
deepCORAL
ERM
FixMatch
PseudoLabel


0

In [15]:
path_base = '/dccstor/hoo-misha-1/wilds/wilds/features/iwildcam/PseudoLabel'
os.system('ls /dccstor/hoo-misha-1/wilds/wilds/features/iwildcam/PseudoLabel')

resnet50_id_test_features.npy
resnet50_id_test_labels.npy
resnet50_id_test_metadata.npy
resnet50_id_val_features.npy
resnet50_id_val_labels.npy
resnet50_id_val_metadata.npy
resnet50_test_features.npy
resnet50_test_labels.npy
resnet50_test_metadata.npy
resnet50_train_features.npy
resnet50_train_labels.npy
resnet50_train_metadata.npy
resnet50_val_features.npy
resnet50_val_labels.npy
resnet50_val_metadata.npy


0

In [4]:
def load_flm():
    test_features = np.load(f'{path_base}/resnet50_test_features.npy')
    test_labels = np.load(f'{path_base}/resnet50_test_labels.npy')
    test_metadata = np.load(f'{path_base}/resnet50_test_metadata.npy')
    return test_features, test_labels, test_metadata

In [5]:
def prune_cam_id(cutoff=50):
    metadata = np.load(f'{path_base}/resnet50_test_metadata.npy')
    unique_counts = np.unique(metadata[:,0],return_counts=True)
    return unique_counts[0][unique_counts[1] > cutoff]

In [6]:
def get_cam_ind(metadata, num_cams=1, cam_id = None):
    unique_counts = np.unique(metadata[:,0],return_counts=True)
    if cam_id is None:
        top_id = unique_counts[0][np.argpartition(unique_counts[1], -num_cams)[-num_cams:]]
    else:
        top_id = cam_id
    print(f'Selecting cameras with ids {top_id}')
    ind = np.zeros(metadata.shape[0]) == 1
    for c_id in top_id:
        ind = np.logical_or(ind,metadata[:,0] == c_id)
    return ind

In [7]:
def cam_flm(num_cams=1, cam_id = None):
    features, labels, metadata = load_flm()
    cam_ind = get_cam_ind(metadata, num_cams, cam_id)
    return features[cam_ind], labels[cam_ind], metadata[cam_ind]

In [8]:
def prune_flm(features, labels, metadata, cutoff=25):
    unique_counts = np.unique(labels,return_counts=True)
    print(f'|   | Total number of classes {len(unique_counts[0])}')
    prune_classes = unique_counts[0][unique_counts[1] < cutoff]
    prune_ind = []
    for clss in prune_classes:
        prune_ind.append((labels == clss).nonzero()[0])
    print(f'|   |   | Pruning {len(prune_classes)} classes with {len(np.concatenate(prune_ind))} data points')
    if len(prune_ind) == 0:
        return features, labels, metadata
    prune_ind = np.concatenate(prune_ind)
    pruned_ind = np.ones(labels.shape[0]) == 1
    pruned_ind[prune_ind] = False
    return features[pruned_ind], labels[pruned_ind], metadata[pruned_ind]

In [9]:
def balanced_sample_ind(labels, batch = 5):
    unique_classes = np.unique(labels)
    #print(unique_classes)
    ret_ind = None
    for clss in unique_classes:
        class_ind = np.where(labels == clss)[0]
        #print(clss, class_ind)
        rand_ind = rng.choice(class_ind,batch)
        if ret_ind is None:
            ret_ind = rand_ind
        else:
            ret_ind = np.concatenate((ret_ind, rand_ind))
    return ret_ind

In [10]:
def get_prediction_accuracy(num_cams=1, largest=True, cam_id = None, cutoff = 25, batch = 5):
    f,l,m = cam_flm(num_cams, cam_id)
    f,l,m = prune_flm(f,l,m, cutoff)
    sampled_ind = balanced_sample_ind(l,batch)
    nonsampled_ind = np.ones(l.shape[0]) == 1
    nonsampled_ind[sampled_ind] = False
    try:
        clf = LogisticRegression(random_state=0,max_iter=2000).fit(f[sampled_ind], l[sampled_ind])
        predictions = clf.predict(f[nonsampled_ind])
    except:
        return -1
    
    return np.sum(predictions == l[nonsampled_ind])/len(predictions)

In [11]:
def get_original_accuracy(num_cams=1, largest=True, cam_id = None, cutoff = 25):
    f,l,m = cam_flm(num_cams, cam_id)
    f,l,m = prune_flm(f,l,m, cutoff)
    weight = np.load('pseudo_classifier_weight.npy')
    bias = np.load('pseudo_classifier_bias.npy')
    pred_logits = f @ weight.T + bias
    pred = np.argmax(pred_logits,axis=1)
    return np.sum(pred == l)/len(pred)

In [13]:
cam_ids = prune_cam_id()
print(f'Total {len(cam_ids)} to check')
cam_dict = {}
orig_dict = {}
cutoff = 25
for cam_id in cam_ids:
    print(f'| Cam ID {cam_id}')
    cam_dict[cam_id] = []
    orig_dict[cam_id] = get_original_accuracy(cam_id=[cam_id], cutoff=cutoff)
    print(f'|   | {orig_dict[cam_id]}')
    for batch in range(1,cutoff):
        print(f'|   | {batch}')
        prediction_acc = 0
        for i in range(3):
            prediction_acc += get_prediction_accuracy(cam_id = [cam_id], cutoff=cutoff, batch=batch)
        prediction_acc /= 3
        print(f'|   | {prediction_acc}')
        cam_dict[cam_id].append(prediction_acc)

Total 35 to check
| Cam ID 24
Selecting cameras with ids [24]
|   | Total number of classes 9
|   |   | Pruning 5 classes with 30 data points
|   | 0.012121212121212121
|   | 1
Selecting cameras with ids [24]
|   | Total number of classes 9
|   |   | Pruning 5 classes with 30 data points
Selecting cameras with ids [24]
|   | Total number of classes 9
|   |   | Pruning 5 classes with 30 data points
Selecting cameras with ids [24]
|   | Total number of classes 9
|   |   | Pruning 5 classes with 30 data points
|   | 0.4222903885480573
|   | 2
Selecting cameras with ids [24]
|   | Total number of classes 9
|   |   | Pruning 5 classes with 30 data points
Selecting cameras with ids [24]
|   | Total number of classes 9
|   |   | Pruning 5 classes with 30 data points
Selecting cameras with ids [24]
|   | Total number of classes 9
|   |   | Pruning 5 classes with 30 data points
|   | 0.47308488612836436
|   | 3
Selecting cameras with ids [24]
|   | Total number of classes 9
|   |   | Pruning 5 

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Selecting cameras with ids [163]
|   | Total number of classes 14
|   |   | Pruning 7 classes with 54 data points
Selecting cameras with ids [163]
|   | Total number of classes 14
|   |   | Pruning 7 classes with 54 data points
|   | 0.5506722272157851
|   | 22
Selecting cameras with ids [163]
|   | Total number of classes 14
|   |   | Pruning 7 classes with 54 data points
Selecting cameras with ids [163]
|   | Total number of classes 14
|   |   | Pruning 7 classes with 54 data points
Selecting cameras with ids [163]
|   | Total number of classes 14
|   |   | Pruning 7 classes with 54 data points
|   | 0.5583979210322806
|   | 23
Selecting cameras with ids [163]
|   | Total number of classes 14
|   |   | Pruning 7 classes with 54 data points
Selecting cameras with ids [163]
|   | Total number of classes 14
|   |   | Pruning 7 classes with 54 data points
Selecting cameras with ids [163]
|   | Total number of classes 14
|   |   | Pruning 7 classes with 54 data points
|   | 0.548682047335

In [25]:
import pickle

def get_dict(model):
    root_path = '/dccstor/hoo-misha-1/wilds/wilds/results/iwildcam'
    base_path = f'{root_path}/{model}'
    
    with open(f'{base_path}_cam_dict.pkl','rb') as file:
        cam_dict = pickle.load(file)

    with open(f'{base_path}_orig_dict.pkl','rb') as file:
        orig_dict = pickle.load(file)
    
    return cam_dict, orig_dict

In [26]:
cam_dict, orig_dict = get_dict('')

In [27]:
cam_ids = prune_cam_id()

In [28]:
from ipywidgets import interact, interactive, fixed, interact_manual
import matplotlib.pyplot as plt

def plot(cam_ind):
    predictions = cam_dict[cam_ids[cam_ind]]
    print(f'Original {orig_dict[cam_ids[cam_ind]]}')
    print(f'Max {max(predictions)}')
    metadata = np.load(f'{path_base}/resnet50_test_metadata.npy')
    unique_counts = np.unique(metadata[:,0],return_counts=True)
    ind = np.where(unique_counts[0] == cam_ids[cam_ind])
    print(f'With {unique_counts[1][ind]} data points pre-pruning')
    predictions = np.hstack((orig_dict[cam_ids[cam_ind]] , predictions))
    plt.plot(range(0,len(predictions)), predictions)
    
interact(plot, cam_ind=(0,len(cam_ids)));

interactive(children=(IntSlider(value=17, description='cam_ind', max=35), Output()), _dom_classes=('widget-int…

In [30]:
from ipywidgets import interact, interactive, fixed, interact_manual
import matplotlib.pyplot as plt

good_inds = []
for i in range(len(cam_ids)):
    predictions = cam_dict[cam_ids[i]]
    if predictions[-1] > 0:
        good_inds.append(i)
        
def plot_2(cam_ind):
    cam_ind = good_inds[cam_ind]
    print(f'Camera id {cam_ids[cam_ind]}')
    predictions = cam_dict[cam_ids[cam_ind]]
    print(f'Original {orig_dict[cam_ids[cam_ind]]}')
    print(f'Max {max(predictions)}')
    metadata = np.load(f'{path_base}/resnet50_test_metadata.npy')
    unique_counts = np.unique(metadata[:,0],return_counts=True)
    ind = np.where(unique_counts[0] == cam_ids[cam_ind])
    print(f'With {unique_counts[1][ind]} data points pre-pruning')
    predictions = np.hstack((orig_dict[cam_ids[cam_ind]] , predictions))
    plt.plot(range(0,len(predictions)), predictions)


interact(plot_2, cam_ind=(0,len(good_inds)));

interactive(children=(IntSlider(value=11, description='cam_ind', max=22), Output()), _dom_classes=('widget-int…

In [31]:
def print_green(text, green=True, end='\n'):
    print(f'\x1b[{32 if green else 31}m{text}\x1b[0m', end=end)
def show_dist(cam_ind, cutoff=25):
    f,l,m = cam_flm(cam_id=[cam_ids[cam_ind]])
    unique_counts = np.unique(l, return_counts=True)
    print(f'Total of {sum(unique_counts[1] > cutoff)} classes over cutoff')
    print('[',end='')
    for y,c in zip(unique_counts[0], unique_counts[1]):
        print_green(f'{y}:{c}:{c/sum(unique_counts[1]):.2f}, ', c > cutoff, end='')
    print(']')

interact(show_dist, cam_ind=(0,len(cam_ids)-1), cutoff=(10,500));

interactive(children=(IntSlider(value=17, description='cam_ind', max=34), IntSlider(value=25, description='cut…

In [38]:
root_path = '/dccstor/hoo-misha-1/wilds/wilds/features/iwildcam'
models = list(os.listdir(root_path))

def plot_3(model_ind, cam_ind):
    global cam_dict, orig_dict
    model = models[model_ind]
    print(f'Using {model}')
    cam_dict, orig_dict = get_dict(model)
    plot_2(cam_ind)

interact(plot_3, model_ind=(0,len(models)-1), cam_ind=(0,len(good_inds)-1));



interactive(children=(IntSlider(value=2, description='model_ind', max=5), IntSlider(value=10, description='cam…

In [17]:
# import pickle

# with open('cam_dict.pkl','wb') as file:
#     pickle.dump(cam_dict, file)
    
# with open('orig_dict.pkl','wb') as file:
#     pickle.dump(orig_dict, file)