In [1]:
import numpy as np
import os
import scipy.io
from sklearn.metrics import classification_report,confusion_matrix

# From the .mat files extract all the features from resnet and the attribute splits.

The res101 contains features and the corresponding labels.

att_splits contains the different splits for trainval, train, val and test set.

In [2]:
#Please add the folder name of the dataset to run it on different dataset.
dataset = 'SUN'
path = 'E:/Sushree/Dataset/data/xlsa17/data/'

res101 = scipy.io.loadmat(path + dataset + '/res101.mat')
att_splits = scipy.io.loadmat(path + dataset + '/att_splits.mat')

# We need the corresponding ground-truth labels/classes for each training example for all our trainval, test seen set and test unseen set according to the split locations provided.

In [3]:
trainval_loc = np.squeeze(att_splits['trainval_loc']-1)
test_seen_loc = np.squeeze(att_splits['test_seen_loc']-1)
test_unseen_loc = np.squeeze(att_splits['test_unseen_loc']-1)


labels = res101['labels']# direct class labels
print(labels, labels.shape)

labels_trainval = labels[trainval_loc]
print(labels_trainval, labels_trainval.shape)

labels_test_seen = labels[test_seen_loc]
print(labels_test_seen, labels_test_seen.shape)

labels_test_unseen = labels[test_unseen_loc]
print(labels_test_unseen, labels_test_unseen.shape)

if len(labels) == len(labels_trainval) + len(labels_test_seen) + len(labels_test_unseen):
    print('correct number of instances for training, test seen and test unseen categories')

[[  1]
 [  1]
 [  1]
 ...
 [717]
 [717]
 [717]] (14340, 1)
[[50]
 [50]
 [50]
 ...
 [70]
 [70]
 [70]] (10320, 1)
[[50]
 [50]
 [50]
 ...
 [70]
 [70]
 [70]] (2580, 1)
[[  4]
 [  4]
 [  4]
 ...
 [716]
 [716]
 [716]] (1440, 1)
correct number of instances for training, test seen and test unseen categories


# In a typical ZSL/GZSL scenario, there are no overlapping classes between training and testing phase, i.e the train classes are completely different from the test unseen classes. 

During training phase and testing phase (seen) we have z classes

During the testing phase (unseen) we have z' classes

In [4]:
trainval_labels_seen = np.unique(labels_trainval)
print(trainval_labels_seen, trainval_labels_seen.shape)

test_labels_seen = np.unique(labels_test_seen)
print(test_labels_seen, test_labels_seen.shape)

test_labels_unseen = np.unique(labels_test_unseen)
print(test_labels_unseen, test_labels_unseen.shape)

print("Number of overlapping classes between trainval and test seen:",len(set(trainval_labels_seen).intersection(set(test_labels_seen))))

print("Number of overlapping classes between trainval and test unseen:",len(set(trainval_labels_seen).intersection(set(test_labels_unseen))))


[  1   2   3   5   6   7   8   9  10  12  13  14  15  16  17  18  19  20
  21  22  23  26  27  28  29  30  31  32  34  35  36  37  38  40  41  42
  43  44  45  46  47  48  49  50  51  52  53  55  56  57  59  60  61  62
  63  64  65  66  67  68  69  70  71  72  74  77  78  79  80  81  82  83
  84  85  87  88  89  90  91  92  93  94  95  97  98  99 101 102 103 105
 106 107 108 109 110 111 112 114 115 116 117 118 119 120 121 122 123 124
 126 127 128 129 130 132 133 134 135 136 137 138 140 141 142 143 144 145
 147 148 149 150 151 152 154 155 156 157 158 160 161 162 163 164 165 166
 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
 186 187 188 189 190 191 192 193 194 195 196 198 199 200 201 202 203 204
 205 206 207 208 209 210 211 212 213 214 215 216 218 219 220 221 223 224
 225 226 227 228 229 230 231 232 233 234 235 236 237 239 240 241 242 243
 244 245 248 249 250 251 252 253 254 256 257 258 259 261 262 264 265 266
 267 268 269 270 271 272 273 274 275 276 277 278 27

In [5]:

k = 0
for labels in trainval_labels_seen:
    labels_trainval[labels_trainval == labels] = k
    k = k+1
    
print(labels_trainval, labels_trainval.shape)

l = 0
for labels in test_labels_seen:
    labels_test_seen[labels_test_seen == labels] = l
    l = l+1

print(labels_test_seen, labels_test_seen.shape)

m = 0
for labels in test_labels_unseen:
    labels_test_unseen[labels_test_unseen == labels] = m
    m = m+1  

print(labels_test_unseen, labels_test_unseen.shape)    

[[43]
 [43]
 [43]
 ...
 [61]
 [61]
 [61]] (10320, 1)
[[43]
 [43]
 [43]
 ...
 [61]
 [61]
 [61]] (2580, 1)
[[ 0]
 [ 0]
 [ 0]
 ...
 [71]
 [71]
 [71]] (1440, 1)


In [6]:

print(np.unique(labels_trainval), np.unique(labels_trainval).shape)

print(np.unique(labels_test_seen), np.unique(labels_test_seen).shape)

print(np.unique(labels_test_unseen), np.unique(labels_test_unseen).shape)

[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 236 237 238 239 240 241 242 243 244 245 24

# Let us denote the features X ∈ [d×m] available at training stage, where d is the dimensionality of the data, and m is the number of instances. We are useing resnet features which are extracted from the dataset.

In [7]:
X_features = res101['features']

trainval_vec = X_features[:, trainval_loc]
test_seen_vec = X_features[:, test_seen_loc]
test_unseen_vec = X_features[:, test_unseen_loc]

print("Features for trainval:", trainval_vec.shape)
print("Features for test seen:", test_seen_vec.shape)
print("Features for test unseen:", test_unseen_vec.shape)

Features for trainval: (2048, 10320)
Features for test seen: (2048, 2580)
Features for test unseen: (2048, 1440)


# Normalize the vectors

In [8]:
def normalization(vec,mean,std):
    sol = vec - mean
    sol1 = sol/std
    return sol1

In [9]:

trainval_mean = trainval_vec.mean(axis=1, keepdims = True)
trainval_std = np.std(trainval_vec, axis=1, keepdims=True)

trainval_vec = normalization(trainval_vec, trainval_mean, trainval_std)
test_seen_vec = normalization(test_seen_vec, trainval_mean, trainval_std)
test_unseen_vec = normalization(test_unseen_vec, trainval_mean, trainval_std)

# Each of the classes in the dataset have an attribute (a) description. This vector is known as the Signature matrix of dimension S ∈ [0, 1]a×z. For training stage and test seen there are z classes and z' classes for test unseen S ∈ [0, 1]a×z'

In [8]:
#Signature matrix
signature = att_splits['att']
trainval_sig = signature[:, (trainval_labels_seen)-1]
test_seen_sig = signature[:, (test_labels_seen)-1]
test_unseen_sig = signature[:, (test_labels_unseen)-1]

print("Signature for trainval:", trainval_sig.shape)
print("Signature for test seen:", test_seen_sig.shape)
print("Signature for test unseen:", test_unseen_sig.shape)

Signature for trainval: (102, 645)
Signature for test seen: (102, 645)
Signature for test unseen: (102, 72)


In [9]:

#params for trainval and test set
m_trainval = labels_trainval.shape[0]# number of instances in training set: 23527
print(m_trainval)

z_trainval = len(trainval_labels_seen)# number of classes in training set: 40
print(z_trainval)


n_test_seen = labels_test_seen.shape[0]# 5882
print(n_test_seen)

z1_test_seen = len(test_labels_seen)# 40
print(z1_test_seen)


n_test_unseen = labels_test_unseen.shape[0]# 7913
print(n_test_unseen)

z1_test_unseen = len(test_labels_unseen)# 10
print(z1_test_unseen)

10320
645
2580
645
1440
72


# The ground truth is a one-hot encoded vector

In [10]:

#grountruth for trainval and test set
gt_trainval = 0*np.ones((m_trainval, z_trainval))# 23527, 40
gt_trainval[np.arange(m_trainval), np.squeeze(labels_trainval)] = 1

print(gt_trainval, gt_trainval.shape)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] (10320, 645)


In [11]:
#trainval set
d_trainval = trainval_vec.shape[0]# 2048
a_trainval = trainval_sig.shape[0]# 85
W = np.zeros((d_trainval, a_trainval)) # 2048, 85

#Note: These hyper-parameters were found using the code snippet available below
gamm1 = 3
alph1 = 0

In [12]:
print(trainval_vec.shape)# (2048, 23527)
print(gt_trainval.shape)# (23527, 40)
print(trainval_sig.shape)# (85, 40)

(2048, 10320)
(10320, 645)
(102, 645)


# The one-line code solution proposed. 
V = inverse(XX' + γI) XYS' inverse(SS' + λI)

In [13]:
part_1_test = np.linalg.pinv(np.matmul(trainval_vec, trainval_vec.transpose()) + (10**alph1)*np.eye(d_trainval))

part_0_test = np.matmul(np.matmul(trainval_vec,gt_trainval),trainval_sig.transpose())

part_2_test = np.linalg.pinv(np.matmul(trainval_sig, trainval_sig.transpose()) + (10**gamm1)*np.eye(a_trainval))

W = np.matmul(np.matmul(part_1_test,part_0_test),part_2_test)

# For inference stage,

argmax(x'VS)

Where S is the signature matrix of the test_set

# For test seen classes

In [14]:
#predictions
outputs_1 = np.matmul(np.matmul(test_seen_vec.transpose(), W), test_seen_sig)
preds_1 = np.array([np.argmax(output) for output in outputs_1])

In [15]:
cm = confusion_matrix(labels_test_seen, preds_1)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
avg = sum(cm.diagonal())/len(test_labels_seen)
acc_seen = avg*100
print("The top 1% accuracy is:", acc_seen)

The top 1% accuracy is: 28.217054263565895


# For test unseen classes

In [16]:
#predictions
outputs_2 = np.matmul(np.matmul(test_unseen_vec.transpose(), W), test_unseen_sig)
preds_2 = np.array([np.argmax(output) for output in outputs_2])

In [17]:
cm = confusion_matrix(labels_test_unseen, preds_2)
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
avg = sum(cm.diagonal())/len(test_labels_unseen)
acc_unseen = avg*100
print("The top 1% accuracy is:", acc_unseen)

The top 1% accuracy is: 47.98611111111111


# Harmonic mean of seen and unseen accuracies

In [18]:
acc_h = (2 * acc_seen * acc_unseen) / (acc_seen + acc_unseen)
print(acc_h)

35.537282328423586


# to find the best hyper-parameter

In [None]:
train_loc = np.squeeze(att_splits['train_loc']-1)
val_loc = np.squeeze(att_splits['val_loc']-1)

labels = res101['labels']
labels_train = labels[train_loc]
labels_val = labels[val_loc]

train_labels_seen = np.unique(labels_train)
val_labels_unseen = np.unique(labels_val)

print("Number of overlapping classes between train and val:",len(set(train_labels_seen).intersection(set(val_labels_unseen))))

i = 0
for labels in train_labels_seen:
    labels_train[labels_train == labels] = i    
    i = i+1
j = 0
for labels in val_labels_unseen:
    labels_val[labels_val == labels] = j
    j = j+1

train_vec = X_features[:, train_loc]
val_vec = X_features[:, val_loc]

print("Features for train:", train_vec.shape)
print("Features for val:", val_vec.shape)

train_mean = train_vec.mean(axis=1, keepdims=True)
train_std = np.std(train_vec, axis=1, keepdims = True)

#train_vec = normalization(train_vec, train_mean, train_std)
#val_vec = normalization(val_vec, train_mean, train_std)

train_sig = signature[:,(train_labels_seen)-1]
val_sig = signature[:,(val_labels_unseen)-1]

print("Signature for train:", train_sig.shape)
print("Signature for val:", val_sig.shape)

#params for train and val set
m_train = labels_train.shape[0]
n_val = labels_val.shape[0]
z_train = len(train_labels_seen)
z1_val = len(val_labels_unseen)

#ground truth for train and val set
gt_train = 0*np.ones((m_train, z_train))
gt_train[np.arange(m_train), np.squeeze(labels_train)] = 1

#train set
d_train = train_vec.shape[0]
a_train = train_sig.shape[0]

#Weights
V = np.zeros((d_train,a_train))

# The below code snippet can be used to find the best hyper-parameter using the train and val set.

In [None]:
accu = 0.10
alph1 = 4
gamm1 = 1
for alpha in range(-3, 4):
    for gamma in range(-3,4):
        #One line solution
        part_1 = np.linalg.pinv(np.matmul(train_vec, train_vec.transpose()) + (10**alpha)*np.eye(d_train))
        part_0 = np.matmul(np.matmul(train_vec,gt_train),train_sig.transpose())
        part_2 = np.linalg.pinv(np.matmul(train_sig, train_sig.transpose()) + (10**gamma)*np.eye(a_train))

        V = np.matmul(np.matmul(part_1,part_0),part_2)
        #print(V)

        #predictions
        outputs = np.matmul(np.matmul(val_vec.transpose(),V),val_sig)
        preds = np.array([np.argmax(output) for output in outputs])

        #print(accuracy_score(labels_val,preds))
        cm = confusion_matrix(labels_val, preds)
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        avg = sum(cm.diagonal())/len(val_labels_unseen)
        #print("Avg:", avg, alpha, gamma)

        if avg > accu:
            accu = avg
            alph1 = alpha
            gamm1 = gamma
print(alph1, gamm1)


# Implementation Results

AWA2 dataset

1. Hyperparameters: gamm1 = 3                alph1 = 0
without normalization of feature vectors

acc_seen = 88.59876114732992           acc_unseen = 45.73846664538193               harmonic mean = 60.331325100921305

2. Hyperparameters: gamm1 = 3                alph1 = 0
with normalization of feature vectors

acc_seen = 79.32389991512959           acc_unseen = 45.85667448938017               harmonic mean = 58.11652926087265


3. Hyperparameters: gamm1 = 3                alph1 = 0 (Value got after validation)
without normalization of feature vectors

acc_seen = 88.59876114732992           acc_unseen = 45.73846664538193               harmonic mean = 60.331325100921305

4. Hyperparameters: gamm1 = 3                alph1 = 0 (Value got after validation)
with normalization of feature vectors

acc_seen = 79.32389991512959           acc_unseen = 45.85667448938017               harmonic mean = 58.11652926087265


CUB dataset

1. Hyperparameters: gamm1 = 3                alph1 = 0
without normalization of feature vectors

acc_seen = 57.50188482541427           acc_unseen = 40.1153158858462                harmonic mean = 47.260242190840735

2. Hyperparameters: gamm1 = 3                alph1 = 0
with normalization of feature vectors

acc_seen = 45.08627320392026           acc_unseen = 37.07889831296149               harmonic mean = 40.69240795280468

3. Hyperparameters: gamm1 = 3                alph1 = 0 (Value got after validation)
without normalization of feature vectors

acc_seen = 57.50188482541427           acc_unseen = 40.1153158858462                harmonic mean = 47.260242190840735



SUN dataset

1. Hyperparameters: gamm1 = 3                alph1 = 0
without normalization of feature vectors

acc_seen = 28.217054263565895           acc_unseen = 47.98611111111111              harmonic mean = 35.537282328423586


