In [1]:
import numpy as np

In [2]:
X_train = np.load('features/resnet18_train_feat.npy')
y_train = np.load('features/resnet18_train_label.npy')
X_valid = np.load('features/resnet18_valid_feat.npy')
y_valid = np.load('features/resnet18_valid_label.npy')
X_test = np.load('features/resnet18_test_feat.npy')
y_test = np.load('features/resnet18_test_label.npy')

In [3]:
# global mean
def global_mean(features):
    return np.mean(features, axis=0)

In [4]:
train_global_mean = global_mean(X_train)
valid_global_mean = global_mean(X_valid)
test_global_mean = global_mean(X_test)

In [5]:
# class mean
def class_mean(features, labels):
    means, nums = np.zeros((10, 512)), np.zeros((10, 512))
    for feat, lab in zip(features, labels):
        means[lab] += feat
        nums[lab] += 1
    means /= nums
    return means

In [6]:
train_class_mean = class_mean(X_train, y_train)
valid_class_mean = class_mean(X_valid, y_valid)
test_class_mean = class_mean(X_test, y_test)

In [13]:
train_total_cov = np.cov(X_train.T)
valid_total_cov = np.cov(X_valid.T)
test_total_cov = np.cov(X_test.T)

In [14]:
valid_total_cov.shape

(512, 512)

In [25]:
np.dot(np.array([train_class_mean[0] - train_global_mean]).T,
           np.array([train_class_mean[0] - train_global_mean])).shape

(512, 512)

In [26]:
train_bet_cov = np.zeros((512, 512))
for i in range(10):
    train_bet_cov += np.dot(np.array([train_class_mean[i] - train_global_mean]).T,
                       np.array([train_class_mean[i] - train_global_mean]))
train_bet_cov /= 10

In [28]:
valid_bet_cov = np.zeros((512, 512))
for i in range(10):
    valid_bet_cov += np.dot(np.array([valid_class_mean[i] - valid_global_mean]).T,
                       np.array([valid_class_mean[i] - valid_global_mean]))
valid_bet_cov /= 10

In [29]:
test_bet_cov = np.zeros((512, 512))
for i in range(10):
    test_bet_cov += np.dot(np.array([test_class_mean[i] - test_global_mean]).T,
                       np.array([test_class_mean[i] - test_global_mean]))
test_bet_cov /= 10

In [30]:
train_within_cov = np.zeros((512, 512))
for feature, label in zip(X_train, y_train):
    train_within_cov += np.dot(np.array([feature - train_class_mean[label]]).T,
                       np.array([feature - train_class_mean[label]]))
train_within_cov /= X_train.shape[0]

In [36]:
valid_within_cov = np.zeros((512, 512))
for feature, label in zip(X_valid, y_valid):
    valid_within_cov += np.dot(np.array([feature - valid_class_mean[label]]).T,
                       np.array([feature - valid_class_mean[label]]))
valid_within_cov /= X_valid.shape[0]

In [35]:
test_within_cov = np.zeros((512, 512))
for feature, label in zip(X_test, y_test):
    test_within_cov += np.dot(np.array([feature - test_class_mean[label]]).T,
                       np.array([feature - test_class_mean[label]]))
test_within_cov /= X_test.shape[0]

In [34]:
train_total_cov - (train_within_cov + train_bet_cov)

array([[-2.79146460e-04, -7.26494666e-05, -6.29824868e-05, ...,
        -4.46482946e-05,  4.94811845e-06,  2.53203998e-04],
       [-7.26494666e-05,  9.31875207e-06, -4.35379703e-05, ...,
        -4.17113775e-05,  1.52121334e-05,  9.01497705e-05],
       [-6.29824868e-05, -4.35379703e-05, -6.37658081e-05, ...,
        -6.68881259e-05, -2.39881258e-05,  1.57712763e-04],
       ...,
       [-4.46482946e-05, -4.17113775e-05, -6.68881259e-05, ...,
         5.35703131e-06, -1.12609344e-05,  8.69779550e-05],
       [ 4.94811845e-06,  1.52121334e-05, -2.39881258e-05, ...,
        -1.12609344e-05,  7.28806368e-06, -6.56526292e-06],
       [ 2.53203998e-04,  9.01497705e-05,  1.57712763e-04, ...,
         8.69779550e-05, -6.56526292e-06, -4.26410841e-04]])

In [37]:
valid_total_cov - (valid_within_cov + valid_bet_cov)

array([[ 3.04988760e-03,  6.97322346e-04,  7.03463467e-04, ...,
         6.48642311e-04, -9.11328427e-05, -2.52531641e-03],
       [ 6.97322346e-04, -6.40197377e-05,  3.87979388e-04, ...,
         3.53745805e-04, -1.26373330e-04, -8.12151992e-04],
       [ 7.03463467e-04,  3.87979388e-04,  6.58969987e-04, ...,
         7.28301095e-04,  2.29466522e-04, -1.51247960e-03],
       ...,
       [ 6.48642311e-04,  3.53745805e-04,  7.28301095e-04, ...,
         1.48047053e-04,  4.91956921e-05, -1.01277400e-03],
       [-9.11328427e-05, -1.26373330e-04,  2.29466522e-04, ...,
         4.91956921e-05, -3.24449842e-05,  5.83467353e-05],
       [-2.52531641e-03, -8.12151992e-04, -1.51247960e-03, ...,
        -1.01277400e-03,  5.83467353e-05,  4.26356106e-03]])

In [38]:
test_total_cov - (test_within_cov + test_bet_cov)

array([[ 8.81610756e-05,  3.57438009e-06,  1.43554727e-05, ...,
         2.14931075e-05, -5.61848015e-06, -3.40843324e-05],
       [ 3.57438009e-06,  7.94810063e-06,  1.09573006e-06, ...,
         2.98960302e-07,  4.56516769e-07, -4.55849313e-06],
       [ 1.43554727e-05,  1.09573006e-06,  5.21779031e-05, ...,
         1.61592562e-06, -4.05187792e-06, -2.36133076e-05],
       ...,
       [ 2.14931075e-05,  2.98960302e-07,  1.61592562e-06, ...,
         2.83926892e-05, -5.51446742e-07, -4.63593902e-06],
       [-5.61848015e-06,  4.56516769e-07, -4.05187792e-06, ...,
        -5.51446742e-07,  6.96969288e-06,  7.61501460e-06],
       [-3.40843324e-05, -4.55849313e-06, -2.36133076e-05, ...,
        -4.63593902e-06,  7.61501460e-06,  6.72999330e-05]])

In [39]:
contraction_train = np.trace(np.dot(train_within_cov, train_bet_cov)) / 10
contraction_valid = np.trace(np.dot(valid_within_cov, valid_bet_cov)) / 10
contraction_test = np.trace(np.dot(test_within_cov, test_bet_cov)) / 10

In [41]:
contraction_train, contraction_valid, contraction_test

(21.90280820873837, 21.604902924682, 22.508009202731806)

In [43]:
train_dist_array = np.zeros(10)
for i in range(10):
    train_dist_array[i] = np.linalg.norm(train_class_mean[i] - train_global_mean)
closeness_train = np.std(train_dist_array) / np.mean(train_dist_array)

In [45]:
valid_dist_array = np.zeros(10)
for i in range(10):
    valid_dist_array[i] = np.linalg.norm(valid_class_mean[i] - valid_global_mean)
closeness_valid = np.std(valid_dist_array) / np.mean(valid_dist_array)

In [46]:
test_dist_array = np.zeros(10)
for i in range(10):
    test_dist_array[i] = np.linalg.norm(test_class_mean[i] - test_global_mean)
closeness_test = np.std(test_dist_array) / np.mean(test_dist_array)

In [47]:
closeness_train, closeness_valid, closeness_test

(0.14924154479150809, 0.15332743294974382, 0.15745918588761434)

In [48]:
import itertools

In [54]:
def cos_sim(vA, vB):
    return np.dot(vA, vB) / (np.sqrt(np.dot(vA,vA)) * np.sqrt(np.dot(vB,vB)))

In [67]:
cos_sim_list = []
for (c1, c2) in list(itertools.combinations(range(10), 2)):
    cos_sim_list.append(cos_sim(train_class_mean[c1]-train_global_mean, train_class_mean[c2]-train_global_mean))
equal_angularity_train = np.std(cos_sim_list)
closeness_maximal_angle_train = np.mean(cos_sim_list + [1]*len(cos_sim_list)) /9

In [68]:
cos_sim_list = []
for (c1, c2) in list(itertools.combinations(range(10), 2)):
    cos_sim_list.append(cos_sim(valid_class_mean[c1]-valid_global_mean, valid_class_mean[c2]-valid_global_mean))
equal_angularity_valid = np.std(cos_sim_list)
closeness_maximal_angle_valid = np.mean(cos_sim_list + [1]*len(cos_sim_list)) /9

In [69]:
cos_sim_list = []
for (c1, c2) in list(itertools.combinations(range(10), 2)):
    cos_sim_list.append(cos_sim(test_class_mean[c1]-test_global_mean, test_class_mean[c2]-test_global_mean))
equal_angularity_test = np.std(cos_sim_list)
closeness_maximal_angle_test = np.mean(cos_sim_list + [1]*len(cos_sim_list)) /9

In [65]:
equal_angularity_train, equal_angularity_valid, equal_angularity_test

(0.416359624450228, 0.41449276832629806, 0.4226815211078981)

In [70]:
closeness_maximal_angle_train, closeness_maximal_angle_valid, closeness_maximal_angle_test

(0.04973388706384904, 0.04971928522050863, 0.04978007179432273)