In [1]:
import math
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.functional as F
import sklearn
import torch
from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.preprocessing import StandardScaler
from scipy.special import softmax
from tqdm import tqdm 
from collections import Counter
import warnings
from sklearn.exceptions import ConvergenceWarning
import matplotlib.pyplot as plt
warnings.simplefilter("always", ConvergenceWarning)
import glob
import pickle
import pprint

In [2]:
from maml.datasets.miniimagenet import MiniimagenetMetaDataset, Task
from maml.models.gated_conv_net_original import ImpRegConvModel
from maml.models.conv_embedding_model import RegConvEmbeddingModel
from maml.logistic_regression_utils import logistic_regression_grad_with_respect_to_w, logistic_regression_hessian_pieces_with_respect_to_w, logistic_regression_hessian_with_respect_to_w, logistic_regression_mixed_derivatives_with_respect_to_w_then_to_X
from maml.logistic_regression_utils import logistic_regression_mixed_derivatives_with_respect_to_w_then_to_X_left_multiply
from maml.algorithm import MetaOptnet, ProtoNet, ImpRMAML_inner_algorithm

In [21]:
all_features_files = glob.glob('inner_solvers_features/**')

In [22]:
all_features_files

['inner_solvers_features/minim_5w5s_protonet_features_dict.pkl',
 'inner_solvers_features/minim_10w1s_SVM_features_dict.pkl',
 'inner_solvers_features/minim_5w1s_protonet_features_dict.pkl',
 'inner_solvers_features/minim_10w1s_protonet_features_dict.pkl',
 'inner_solvers_features/minim_5w15s_protonet_features_dict.pkl',
 'inner_solvers_features/minim_5w1s_SVM_features_dict.pkl',
 'inner_solvers_features/minim_5w1s_LR_features_dict.pkl',
 'inner_solvers_features/minim_5w1s_protosvm_features_dict.pkl',
 'inner_solvers_features/minim_20w5s_protonet_features_dict.pkl',
 'inner_solvers_features/minim_5w15s_LR_features_dict.pkl',
 'inner_solvers_features/minim_5w5s_protosvm_features_dict.pkl',
 'inner_solvers_features/minim_5w5s_LR_features_dict.pkl',
 'inner_solvers_features/minim_5w5s_SVM_features_dict.pkl',
 'inner_solvers_features/minim_20w1s_SVM_features_dict.pkl',
 'inner_solvers_features/minim_10w1s_LR_features_dict.pkl',
 'inner_solvers_features/minim_10w5s_protonet_features_dict.pk

In [23]:
all_features = []
for ff in all_features_files:
    with open(ff, 'rb') as f:
        all_features.append(pickle.load(f))

## modular re-usable methods

In [6]:
def compute_mean_variance(X):
    """
        X: (N x d)
        returns scalar
        sum (||X - mean_X||_2^2) / N
    """
#     print(f"recvd X of shape {X.shape}")
    N = X.shape[0]
    mean = X.mean(0)
    return mean, (np.sum((X - X.mean(0))**2))/N

In [7]:
def get_X_y_from_features_dict(features_dict):
    X = []
    y = []
    for label in features_dict.keys():
        X.append(features_dict[label])
        y += [label] * X[-1].shape[0]
    X = np.concatenate(X, axis=0)
    y = np.array(y)
#     print(f"Finally returning X, y of shapes : {X.shape} and {y.shape}")
    return X, y

In [8]:
def get_PCA(X):
    pca = PCA()
    pca.fit(X)
    return pca

In [97]:
def compute_variance(estimators):
    estimators = np.concatenate(estimators, axis=0)
    assert len(estimators.shape) == 2
    assert estimators.shape[1] == 1600
    _explained_variance_ratio = []
#     print("final esitimators", estimators.shape)
#     estimators = estimators - estimators.mean(0)
    pca = PCA(n_components=5)
    pca.fit(estimators)
#         print(f"class: {i+1} exp. variance: ", S.explained_variance_ratio_)
#         print(f"class: {i+1} svd values : ", S.singular_values_)
#     print(np.sum(pca.explained_variance_ratio_))
    return pca.explained_variance_ratio_

## metrics to evaluate 

In [9]:
# interclass_vs_intraclass variance
def feature_clustering(features_dict, split):
    X, y = get_X_y_from_features_dict(features_dict[split])
    all_labels = set(y)
    means = []
    numerator = 0.
    for label in all_labels:
        mean, numerator_var = compute_mean_variance(X[y==label, :])
        means.append(mean)
        numerator += numerator_var
    _, denominator = compute_mean_variance(np.stack(means, axis=0))
    print("num", numerator)
    print("denom", denominator)
    return (numerator / (denominator * len(all_labels)))

In [10]:
# variance explained by top k components
def variance_explained(features_dict, split):
    X, y = get_X_y_from_features_dict(features_dict[split])
    all_labels = set(y)
    var_explained = []
    for label in all_labels:
        pca = get_PCA(X[y==label, :])
        var_explained.append(pca.explained_variance_ratio_)
    var_explained = (np.stack(var_explained, axis=0)).mean(0)
    return var_explained

In [11]:
# variance in disc direction for classes 65 and 70 (can be averaged over multiple ones)
def variance_discr_direction(features_dict, split):
    X, y = get_X_y_from_features_dict(features_dict[split])
    all_labels = set(y)
    n_runs = 20
    avg_var = 0.
    for _ in range(n_runs):
        binary_problem_labels = np.random.choice(
            list(all_labels), 2, replace=False)
        var_explained = []
        X_1 = X[y==binary_problem_labels[0], :]
        X_2 = X[y==binary_problem_labels[1], :]
        y_bin = np.array([0] * len(X_1) + [1] * len(X_2))
        with warnings.catch_warnings(record=True) as wn:
            lr_classifier = LogisticRegression()
            lr_classifier.fit(np.concatenate([X_1, X_2], axis=0), y_bin)
        normalized_lr_classifier = lr_classifier.coef_.T / np.linalg.norm(lr_classifier.coef_.T)
#         print(np.eye(normalized_lr_classifier.shape[0]))
#         _, var_1 = compute_mean_variance(X_1 @ (np.eye(normalized_lr_classifier.shape[0]) - normalized_lr_classifier @ normalized_lr_classifier.T))
#         _, var_2 = compute_mean_variance(X_2 @ (np.eye(normalized_lr_classifier.shape[0]) - normalized_lr_classifier @ normalized_lr_classifier.T))
        _, var_1 = compute_mean_variance(X_1 @ normalized_lr_classifier)
        _, var_2 = compute_mean_variance(X_2 @ normalized_lr_classifier)
        _, tvar_1 = compute_mean_variance(X_1)
        _, tvar_2 = compute_mean_variance(X_2)
        avg_var += (var_1 / tvar_1 + var_2 / tvar_2) / 2.
    print("correct", avg_var/n_runs)
    return avg_var / n_runs

In [12]:
def evaluate_ineq(a, b):
    diff = a - b
#     num = np.linalg.norm(diff[0, :] - diff[1, :])
    diff = diff / np.linalg.norm(diff, axis=1)[:, None]
    return diff[0, :].T @ diff[1, :] 
#     deno = np.sum(np.linalg.norm(diff, axis=1))
#     return num / deno

In [86]:
def hyperplane_variance(features_dict, split):
    X, y = get_X_y_from_features_dict(features_dict[split])
    all_labels = set(y)
    n_runs = 50
    print("n_runs", n_runs)
    r_hv = []
    for _ in range(n_runs):
        binary_problem_labels = np.random.choice(
            list(all_labels), 2, replace=False)
        X_1 = X[y==binary_problem_labels[0], :]
        X_2 = X[y==binary_problem_labels[1], :]
        
        rhv_pair_classes = 0.
        n_inner_runs = 20
        for _ in range(n_inner_runs):
            random_indices_1 = np.random.choice(len(X_1), 2, replace=False)
            random_indices_2 = np.random.choice(len(X_2), 2, replace=False)
            rhv_pair_classes += evaluate_ineq(X_1[random_indices_1, :], X_2[random_indices_2, :])
        r_hv.append(rhv_pair_classes / n_inner_runs)
        
    return np.mean(r_hv) 

In [87]:
# variaance for each indi. task
def task_variance(features_dict, split):
    X, y = get_X_y_from_features_dict(features_dict[split])
    all_labels = set(y)
    n_runs = 20
    avg_var = 0.
    for _ in range(n_runs):
        binary_problem_labels = np.random.choice(
            list(all_labels), 2, replace=False)
        var_explained = []
        X_1 = X[y==binary_problem_labels[0], :]
        X_2 = X[y==binary_problem_labels[1], :]
        y_bin = np.array([0] * len(X_1) + [1] * len(X_2))
        with warnings.catch_warnings(record=True) as wn:
            lr_classifier = LogisticRegression()
            lr_classifier.fit(np.concatenate([X_1, X_2], axis=0), y_bin)
        _, var_1 = compute_mean_variance(X_1 @ lr_classifier.coef_.T)
        _, var_2 = compute_mean_variance(X_2 @ lr_classifier.coef_.T)
        _, tvar_1 = compute_mean_variance(X_1)
        _, tvar_2 = compute_mean_variance(X_2)
        avg_var += (var_1 / tvar_1 + var_2 / tvar_2) / 2.
    return avg_var / n_runs

In [98]:
def hyperplane_variance_plus(features_dict, split):
    X, y = get_X_y_from_features_dict(features_dict[split])
    all_labels = set(y)
    outer_n_runs = 50
    inner_n_runs = 25
    print("outer_n_runs", outer_n_runs)
    print("inner_n_runs", inner_n_runs)
    explained_variance_ratio = []
    for _1 in range(outer_n_runs):
        
        binary_problem_labels = np.random.choice(
                list(all_labels), 2, replace=False)
        X_1 = X[y==binary_problem_labels[0], :]
        X_2 = X[y==binary_problem_labels[1], :]
        estimators = []

        for _2 in range(inner_n_runs):

            random_train_indices_1 = np.random.choice(len(X_1), 5, replace=False)
            random_train_indices_2 = np.random.choice(len(X_2), 5, replace=False)
            with warnings.catch_warnings(record=True) as wn:
                lr_classifier = LogisticRegression(penalty='none', max_iter=1000, tol=1e-6)
                y_bin = np.array([0] * len(random_train_indices_1) + [1] * len(random_train_indices_2))
                lr_classifier.fit(np.concatenate([X_1[random_train_indices_1, :],
                                                  X_2[random_train_indices_2, :]], axis=0), y_bin)
            normalized_lr_classifier = lr_classifier.coef_.T / np.linalg.norm(lr_classifier.coef_.T)
            estimators.append(normalized_lr_classifier.T)
        explained_variance_ratio.append(compute_variance(estimators))
    return np.array(explained_variance_ratio)[:, :10].sum(1).mean()

## main analysis engine


In [99]:
engine = {
#     'fc': feature_clustering,
#     'var_exp': variance_explained,
#     'var_disc': variance_discr_direction,
#     'per_task_variance': task_variance,
#       'hyperplane_variance' : hyperplane_variance,
    'hyperplane_variance_plus' : hyperplane_variance_plus
}


In [100]:
for analysis_name, analysis_func in engine.items():
    metrics = {}
    print(f"Running analysis: {analysis_name}")
    for i, (feature_name, features) in enumerate(zip(all_features_files, all_features)):  
        name = " ".join(feature_name.split('/')[-1].split('.')[0].split('_')[1:3][::-1])
        print(name)
        metrics[name] = engine[analysis_name](all_features[i], 'test')
        print(metrics[name])
    pprint.pprint(f"Results:") 
    pprint.pprint(metrics)

Running analysis: hyperplane_variance_plus
protonet 5w5s
outer_n_runs 50
inner_n_runs 25
0.38291633527946084
SVM 10w1s
outer_n_runs 50
inner_n_runs 25
0.41569058564764966
protonet 5w1s
outer_n_runs 50
inner_n_runs 25
0.38108445831856924
protonet 10w1s
outer_n_runs 50
inner_n_runs 25
0.360159922816722
protonet 5w15s
outer_n_runs 50
inner_n_runs 25
0.3475063557229677
SVM 5w1s
outer_n_runs 50
inner_n_runs 25
0.4227811974310783
LR 5w1s
outer_n_runs 50
inner_n_runs 25
0.3644290450016169
protosvm 5w1s
outer_n_runs 50
inner_n_runs 25
0.37816641559344805
protonet 20w5s
outer_n_runs 50
inner_n_runs 25
0.38644055721630294
LR 5w15s
outer_n_runs 50
inner_n_runs 25
0.3500232227385873
protosvm 5w5s
outer_n_runs 50
inner_n_runs 25
0.35320573667161304
LR 5w5s
outer_n_runs 50
inner_n_runs 25
0.348460461812467
SVM 5w5s
outer_n_runs 50
inner_n_runs 25
0.39515958014920394
SVM 20w1s
outer_n_runs 50
inner_n_runs 25
0.4304870034413657
LR 10w1s
outer_n_runs 50
inner_n_runs 25
0.36888045539035647
protonet 10w5

In [13]:
for name, value in metrics.items():
    if '5w1s' in name:
        plt.plot(value[:10], label=name, marker='o')
    print(name, sum(value[:300]))
plt.xticks(np.arange(0, 10), size=8)
plt.yticks(np.arange(0, 0.2, 0.02), size=8)
plt.legend()
plt.show()

IndexError: invalid index to scalar variable.