In [1]:
import numpy as np
import os
import scipy.io
from sklearn.metrics import classification_report,confusion_matrix

In [2]:
#Please add the folder name of the dataset to run it on different dataset.
dataset = 'CUB'
path = 'E:/Sushree/Dataset/data/xlsa17/data/'

res101 = scipy.io.loadmat(path + dataset + '/res101.mat')
att_splits = scipy.io.loadmat(path + dataset + '/att_splits.mat')

In [3]:
# total number of instances or images = 11788: ranges from 0 to 11787

trainval_loc = np.squeeze(att_splits['trainval_loc']-1) # -1: to consider the overflow problem
print(np.unique(trainval_loc), np.max(np.unique(trainval_loc))) # smallest location: 1, largest location 11726

test_seen_loc = np.squeeze(att_splits['test_seen_loc']-1)
print(np.unique(test_seen_loc), np.max(np.unique(test_seen_loc))) # smallest location: 0, largest location 11727

test_unseen_loc = np.squeeze(att_splits['test_unseen_loc']-1)
print(np.unique(test_unseen_loc), np.max(np.unique(test_unseen_loc))) # smallest location: 178, largest location 11727



[    1     2     4 ... 11724 11725 11726] 11726
[    0     3     6 ... 11716 11717 11727] 11727
[  178   179   180 ... 11785 11786 11787] 11787


In [4]:

labels = res101['labels']# direct class labels
print('labels', labels, labels.shape)# 11788 x 1

print('unique_labels', np.unique(labels), np.unique(labels).shape)# class labels range from 1 to 200, 200 classes

# get the labels for trainval, test seen and test unseen sets

labels_trainval = labels[trainval_loc]
print('labels_trainval', labels_trainval, labels_trainval.shape)

unique_labels_trainval = np.unique(labels_trainval) # labels min:1 max:200
print('unique_labels_trainval', unique_labels_trainval, unique_labels_trainval.shape)# 150 classes


labels_test_seen = labels[test_seen_loc]
print('labels_test_seen', labels_test_seen, labels_test_seen.shape)

unique_labels_test_seen = np.unique(labels_test_seen) # labels min:1 max:200
print('unique_labels_test_seen', unique_labels_test_seen, unique_labels_test_seen.shape)# 150 classes


labels_test_unseen = labels[test_unseen_loc]
print('labels_test_unseen', labels_test_unseen, labels_test_unseen.shape)

unique_labels_test_unseen = np.unique(labels_test_unseen) # labels min:7 max:195
print('unique_labels_test_unseen', unique_labels_test_unseen, unique_labels_test_unseen.shape)# 50 classes


if len(labels) == len(labels_trainval) + len(labels_test_seen) + len(labels_test_unseen):
    print('correct number of instances for training, test seen and test unseen categories')
    
print("Number of overlapping classes between trainval and test seen:",len(set(unique_labels_trainval).intersection(set(unique_labels_test_seen))))

print("Number of overlapping classes between trainval and test unseen:",len(set(unique_labels_trainval).intersection(set(unique_labels_test_unseen))))


labels [[151]
 [151]
 [151]
 ...
 [150]
 [150]
 [150]] (11788, 1)
unique_labels [  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
 199 200] (200,)
labels_trainval [[197]
 [198]
 [ 31]
 ...
 [ 65]
 [147]
 [ 22]] (7057, 1)
unique_labels_trainval [  

In [5]:
X_features = res101['features']

# locations are already subtracted by 1, so they range from 0 to 11787
trainval_vec = X_features[:, trainval_loc].transpose()
test_seen_vec = X_features[:, test_seen_loc].transpose()
test_unseen_vec = X_features[:, test_unseen_loc].transpose()

print("Features for trainval:", trainval_vec.shape) #(7057, 2048)
print("Features for test seen:", test_seen_vec.shape)# (7057, 2048)
print("Features for test unseen:", test_unseen_vec.shape) #(7057, 2048)

Features for trainval: (7057, 2048)
Features for test seen: (1764, 2048)
Features for test unseen: (2967, 2048)


In [6]:
signature = att_splits['att']
print(signature.shape) #(312, 200)

attribute = signature.transpose()
print(attribute, attribute.shape)#(200, 312)

# attribute is defined for all 200 classes, so we cant use locations directly, instead we have to use labels 
# that range from 1 to 200, so we have to subtract 1

train_attributes = np.zeros((len(trainval_loc), 312))
for i in range(len(trainval_loc)):
    train_attributes[i] = attribute[int(labels_trainval[i])-1]

print(train_attributes, train_attributes.shape)# (7057, 312)

test_seen_attributes = np.zeros((len(test_seen_loc), 312))
for i in range(len(test_seen_loc)):
    test_seen_attributes[i] = attribute[int(labels_test_seen[i])-1]

print(test_seen_attributes, test_seen_attributes.shape)# (7057, 312)

test_unseen_attributes = np.zeros((len(test_unseen_loc), 312))
for i in range(len(test_unseen_loc)):
    test_unseen_attributes[i] = attribute[int(labels_test_unseen[i])-1]

print(test_unseen_attributes, test_unseen_attributes.shape)# (7057, 312)


(312, 200)
[[0.0106384  0.0106384  0.00709227 ... 0.00918617 0.02526198 0.02066889]
 [0.         0.01133243 0.00944369 ... 0.00266542 0.02132333 0.05863916]
 [0.         0.         0.00742474 ... 0.         0.00885258 0.01770516]
 ...
 [0.         0.00334966 0.         ... 0.00556558 0.         0.15027069]
 [0.         0.11184146 0.         ... 0.08207164 0.05836206 0.01823814]
 [0.04378019 0.02814441 0.         ... 0.06022509 0.07695428 0.06189801]] (200, 312)
[[0.00338059 0.         0.00338059 ... 0.00343068 0.03945277 0.15266508]
 [0.         0.00334966 0.         ... 0.00556558 0.         0.15027069]
 [0.         0.         0.00424052 ... 0.00463149 0.0663847  0.12350642]
 ...
 [0.01055845 0.08446762 0.         ... 0.03555842 0.04504067 0.14934538]
 [0.00637912 0.03030081 0.         ... 0.05399318 0.13266897 0.01542662]
 [0.0046952  0.08607864 0.         ... 0.         0.         0.01391271]] (7057, 312)
[[0.         0.00191483 0.         ... 0.         0.05482619 0.08991495]
 [0.0

In [7]:
import gensim.downloader as api
import pandas as pd
print('Load pretrain w2v model')

model_name = 'word2vec-google-news-300'#best model
#model_name = 'fasttext-wiki-news-subwords-300'
#model_name = 'glove-wiki-gigaword-300'
#model_name = 'glove-wiki-gigaword-200'
#model_name = 'glove-twitter-100'
#model_name = 'glove-twitter-200'
model = api.load(model_name)

dim_w2v = 300


#For CUB
replace_word = [('spatulate','broad'),('upperparts','upper parts'),('grey','gray'), ('eyering', 'eye ring')] # for CUB


path = 'E:/Sushree/Dataset/CUB_200_2011/CUB_200_2011/attributes/attributes.txt'
df=pd.read_csv(path,sep=' ',header = None, names = ['idx','des'])
des = df['des'].values

#%% replace out of dictionary (OOD) words
for pair in replace_word:
    for idx,s in enumerate(des):
        des[idx] = s.replace(pair[0],pair[1])
print('Done replacing OOD words')

#%% filter
new_des = [' '.join(i.split('_')) for i in des]
new_des = [' '.join(i.split('-')) for i in new_des]
new_des = [' '.join(i.split('::')) for i in new_des]
new_des = [i.split('(')[0] for i in new_des]
new_des = [i[4:] for i in new_des]



df['new_des'] = des
df.to_csv('E:/Sushree/Dataset/CUB_200_2011/CUB_200_2011/attributes/new_des.csv')
print('Done preprocessing attribute des')



Load pretrain w2v model
Done replacing OOD words
Done preprocessing attribute des


In [8]:
import pickle

counter_err = 0

all_w2v = []
for s in new_des:
    print(s)
    words = s.split(' ')
    if words[-1] == '':     #remove empty element
        words = words[:-1]
    w2v = np.zeros(dim_w2v)
    for w in words:
        try:
            w2v += model[w]
        except Exception as e:
            print(e)
            counter_err += 1
    w2v = w2v / len(words)  
    all_w2v.append(w2v[np.newaxis,:])
    
print('counter_err ',counter_err)

#%%
w2v_att = np.concatenate(all_w2v,axis=0)
#pdb.set_trace()
#%%
print(w2v_att, w2v_att.shape)

bill shape curved 
bill shape dagger
bill shape hooked
bill shape needle
bill shape hooked seabird
bill shape broad
bill shape all purpose
bill shape cone
bill shape specialized
wing color blue
wing color brown
wing color iridescent
wing color purple
wing color rufous
wing color gray
wing color yellow
wing color olive
wing color green
wing color pink
wing color orange
wing color black
wing color white
wing color red
wing color buff
upper parts color blue
upper parts color brown
upper parts color iridescent
upper parts color purple
upper parts color rufous
upper parts color gray
upper parts color yellow
upper parts color olive
upper parts color green
upper parts color pink
upper parts color orange
upper parts color black
upper parts color white
upper parts color red
upper parts color buff
underparts color blue
underparts color brown
underparts color iridescent
underparts color purple
underparts color rufous
underparts color gray
underparts color yellow
underparts color olive
underparts 

In [9]:
train_attributes_2 = np.matmul(train_attributes, w2v_att)
train_attributes_2[train_attributes_2<0]=0
print(train_attributes_2, train_attributes_2.shape)

test_seen_attributes_2 = np.matmul(test_seen_attributes, w2v_att)
test_seen_attributes_2[test_seen_attributes_2<0]=0
print(test_seen_attributes_2, test_seen_attributes_2.shape)

test_unseen_attributes_2 = np.matmul(test_unseen_attributes, w2v_att)
test_unseen_attributes_2[test_unseen_attributes_2<0]=0
print(test_unseen_attributes_2, test_unseen_attributes_2.shape)

[[0.10009568 0.9377297  0.         ... 0.02577444 0.30546715 0.        ]
 [0.1383094  0.94562687 0.         ... 0.01784731 0.28583191 0.        ]
 [0.         0.72600752 0.         ... 0.00275187 0.22658321 0.        ]
 ...
 [0.05288081 1.0878565  0.         ... 0.         0.4056107  0.        ]
 [0.20065223 1.01485636 0.         ... 0.         0.25923759 0.04305081]
 [0.08196223 0.66665812 0.         ... 0.01921613 0.13555862 0.        ]] (7057, 300)
[[0.02500706 1.01960297 0.         ... 0.         0.30171104 0.02582171]
 [0.00635005 0.7743511  0.         ... 0.         0.21568862 0.        ]
 [0.08385111 0.75811102 0.         ... 0.02312575 0.18178269 0.        ]
 ...
 [0.08886514 0.98288574 0.         ... 0.         0.25516327 0.09668336]
 [0.18223473 1.0922386  0.         ... 0.02081316 0.31095575 0.        ]
 [0.         0.73958663 0.         ... 0.         0.18485154 0.        ]] (1764, 300)
[[0.06404399 0.71426969 0.         ... 0.02208984 0.15291472 0.        ]
 [0.06404399 0.

In [10]:
# as labels range from 1 to 200, we have subtract 1
attribute_2 = np.matmul(attribute, w2v_att)
attribute_2[attribute_2<0]=0
print(attribute_2, attribute_2.shape)

signature_2 = attribute_2.transpose()
print(signature_2, signature_2.shape)#(200, 300)

trainval_sig = signature_2[:, (unique_labels_trainval)-1]
test_seen_sig = signature_2[:, (unique_labels_test_seen)-1]
test_unseen_sig = signature_2[:, (unique_labels_test_unseen)-1]

print("Signature for trainval:", trainval_sig.shape)
print("Signature for test seen:", test_seen_sig.shape)
print("Signature for test unseen:", test_unseen_sig.shape)

[[0.         0.83126803 0.         ... 0.         0.21368247 0.        ]
 [0.07500675 1.10233067 0.         ... 0.02798326 0.32832836 0.        ]
 [0.03885375 0.84916021 0.         ... 0.         0.22503638 0.        ]
 ...
 [0.1383094  0.94562687 0.         ... 0.01784731 0.28583191 0.        ]
 [0.05964153 0.81873677 0.         ... 0.         0.25646681 0.        ]
 [0.15135236 1.02497011 0.         ... 0.         0.27544189 0.        ]] (200, 300)
[[0.         0.07500675 0.03885375 ... 0.1383094  0.05964153 0.15135236]
 [0.83126803 1.10233067 0.84916021 ... 0.94562687 0.81873677 1.02497011]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.02798326 0.         ... 0.01784731 0.         0.        ]
 [0.21368247 0.32832836 0.22503638 ... 0.28583191 0.25646681 0.27544189]
 [0.         0.         0.         ... 0.         0.         0.        ]] (300, 200)
Signature for trainval: (300, 150)
Signature for test seen: (300, 150)
Signature for test 

In [11]:
# by doing this modification, we are changing the range of trainval and test seen labels from 0 to 149 
# and test unseen labels from 0 to 49

k = 0
new_labels_trainval = np.zeros((len(labels_trainval), 1), dtype = 'int')
for labels in unique_labels_trainval:
    new_labels_trainval[labels_trainval == labels] = k
    k = k+1
    
print(new_labels_trainval, new_labels_trainval.shape)#(23527, 1)

l = 0
new_labels_test_seen = np.zeros((len(labels_test_seen), 1), dtype = 'int')
for labels in unique_labels_test_seen:
    new_labels_test_seen[labels_test_seen == labels] = l
    l = l+1
    
print(new_labels_test_seen, new_labels_test_seen.shape)# (5882, 1)

m = 0
new_labels_test_unseen = np.zeros((len(labels_test_unseen), 1), dtype = 'int')
for labels in unique_labels_test_unseen:
    new_labels_test_unseen[labels_test_unseen == labels] = m
    m = m+1  

print(new_labels_test_unseen, new_labels_test_unseen.shape) #  (7913, 1)  


print(np.unique(new_labels_trainval), np.unique(new_labels_trainval).shape)

print(np.unique(new_labels_test_seen), np.unique(new_labels_test_seen).shape)

print(np.unique(new_labels_test_unseen), np.unique(new_labels_test_unseen).shape)

[[146]
 [147]
 [ 26]
 ...
 [ 55]
 [115]
 [ 18]] (7057, 1)
[[ 90]
 [ 85]
 [ 17]
 ...
 [107]
 [ 76]
 [137]] (1764, 1)
[[32]
 [32]
 [32]
 ...
 [31]
 [31]
 [31]] (2967, 1)
[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149] (150,)
[  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52 

In [12]:
#params for trainval and test set
m_trainval = new_labels_trainval.shape[0]# number of instances in training set: 23527
print(m_trainval)

z_trainval = len(unique_labels_trainval)# number of classes in training set: 40
print(z_trainval)


n_test_seen = new_labels_test_seen.shape[0]# 5882
print(n_test_seen)

z1_test_seen = len(unique_labels_test_seen)# 40
print(z1_test_seen)


n_test_unseen = new_labels_test_unseen.shape[0]# 7913
print(n_test_unseen)

z1_test_unseen = len(unique_labels_test_unseen)# 10
print(z1_test_unseen)

7057
150
1764
150
2967
50


In [13]:
from tensorflow.keras.utils import to_categorical
gt_trainval = to_categorical(new_labels_trainval, z_trainval)

print(gt_trainval, gt_trainval.shape)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] (7057, 150)


In [14]:
input1_shape = trainval_vec.shape[1]
print(input1_shape)

attribute_shape = trainval_sig.shape[0]
print(attribute_shape)

output_shape = z_trainval
print(output_shape)


2048
300
150


In [15]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import *
from keras.optimizers import SGD, Adam, Adagrad

# define model2 for attribute to class label mapping

input2 = Input(shape = attribute_shape)
output = Dense(output_shape, name="output", activation='softmax')(input2)

model2 = Model(inputs = input2, outputs = output)

#opt = SGD(learning_rate = 1e-2, decay = 1e-6, momentum = 0.9, nesterov = True)
opt = Adam(learning_rate = 1e-2, beta_1=0.9, beta_2=0.999, epsilon=0.01, decay=0.0001)

model2.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])

model2.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 300)]             0         
                                                                 
 output (Dense)              (None, 150)               45150     
                                                                 
Total params: 45,150
Trainable params: 45,150
Non-trainable params: 0
_________________________________________________________________


In [16]:
# define model1 for resnet feature to class label mapping

input1 = Input(shape = input1_shape)
#inter_pre = Dense(512, name="intermediate_previous", activation='relu')(input1)
inter = Dense(attribute_shape, name = "intermediate", activation = 'linear')(input1)
output = Dense(output_shape, name="output", activation='softmax')(inter)

model1 = Model(inputs = input1, outputs = output)

opt = Adam(learning_rate = 1e-2, beta_1=0.9, beta_2=0.999, epsilon=0.01, decay=0.0001)

model1.compile(optimizer = opt, loss = 'categorical_crossentropy', metrics = ['accuracy'])


model1.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 2048)]            0         
                                                                 
 intermediate (Dense)        (None, 300)               614700    
                                                                 
 output (Dense)              (None, 150)               45150     
                                                                 
Total params: 659,850
Trainable params: 659,850
Non-trainable params: 0
_________________________________________________________________


In [17]:
trainval_input1 = trainval_vec
print(trainval_input1.shape)

trainval_input2 = train_attributes_2
print(trainval_input2.shape)

trainval_output = gt_trainval
print(trainval_output.shape)

(7057, 2048)
(7057, 300)
(7057, 150)


In [18]:
from tensorflow.keras.utils import Sequence
class DataGenerator(Sequence):
    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        return batch_x, batch_y
    
batch_size = 16
from sklearn.model_selection import train_test_split    

In [19]:

X_train1, X_val1, y_train1, y_val1 = train_test_split(trainval_input1, trainval_output, test_size = 0.2, random_state = 42)

#train_gen1 = DataGenerator(X_train1, y_train1, batch_size)   
#val_gen1 = DataGenerator(X_val1, y_val1, batch_size)


X_train2, X_val2, y_train2, y_val2 = train_test_split(trainval_input2, trainval_output, test_size = 0.2, random_state = 42)

#train_gen2 = DataGenerator(X_train2, y_train2, batch_size)   
#val_gen2 = DataGenerator(X_val2, y_val2, batch_size)

In [20]:
iteration = 5
epochs1 = 200
epochs2 = 200

best_performance_micro = [0, 0, 0]
best_performance_macro = [0, 0, 0]

save_path = 'C:/Users/Admin/Sushree_Codes/Sush_3/Results/'
name = 'model1_CUB_Tatt300_it5_200eph_adam_cce_16bch_1e-2lr_model2_adam_lr-2_200'


for i in range(iteration):
    X_train2_it = X_train2[(len(X_train2)//iteration)*i:(len(X_train2)//iteration)*(i+1)]
    X_val2_it = X_val2[(len(X_val2)//iteration)*i:(len(X_val2)//iteration)*(i+1)]
    y_train2_it = y_train2[(len(y_train2)//iteration)*i:(len(y_train2)//iteration)*(i+1)]
    y_val2_it = y_val2[(len(y_val2)//iteration)*i:(len(y_val2)//iteration)*(i+1)]
    
    train_gen2 = DataGenerator(X_train2_it, y_train2_it, batch_size)   
    val_gen2 = DataGenerator(X_val2_it, y_val2_it, batch_size)

    train_summary2 = model2.fit(train_gen2, epochs = epochs2, verbose = 0, callbacks = None, validation_data = val_gen2, 
                              shuffle = True, steps_per_epoch = len(train_gen2)//batch_size, 
                              validation_steps = len(val_gen2)//batch_size)

    print("iteration:", i)
    print('model 2 is trained:', 'training acc:', train_summary2.history['accuracy'][-1], ',',  
          'training loss:', train_summary2.history['loss'][-1], ',', 
          'validation acc:', train_summary2.history['val_accuracy'][-1], ',',
         'validation_loss:', train_summary2.history['val_loss'][-1])

    weights_list2 = model2.get_weights()
    #print(weights_list2)

    model1.layers[-1].set_weights(weights_list2)

    X_train1_it = X_train1[(len(X_train1)//iteration)*i:(len(X_train1)//iteration)*(i+1)]
    X_val1_it = X_val1[(len(X_val1)//iteration)*i:(len(X_val1)//iteration)*(i+1)]
    y_train1_it = y_train1[(len(y_train1)//iteration)*i:(len(y_train1)//iteration)*(i+1)]
    y_val1_it = y_val1[(len(y_val1)//iteration)*i:(len(y_val1)//iteration)*(i+1)]
    
    train_gen1 = DataGenerator(X_train1_it, y_train1_it, batch_size)   
    val_gen1 = DataGenerator(X_val1_it, y_val1_it, batch_size)
    
    for layer in model1.layers[2:]:
        layer.trainable = True

    train_summary1 = model1.fit(train_gen1, epochs = epochs1, verbose = 0, callbacks = None, validation_data = val_gen1, 
                              shuffle = True, steps_per_epoch = len(train_gen1)//batch_size, 
                              validation_steps = len(val_gen1)//batch_size)
    
    #print("iteration:", i)
    print('model 1 is trained:', 'training acc:', train_summary1.history['accuracy'][-1], ',',  
          'training loss:', train_summary1.history['loss'][-1], ',', 
          'validation acc:', train_summary1.history['val_accuracy'][-1], ',',
         'validation_loss:', train_summary1.history['val_loss'][-1])
    
    weights_list1 = Model(inputs = model1.input, outputs = model1.layers[-1].output).get_weights()
    
    #predictions
    #outputs_seen = np.matmul(np.matmul(test_seen_vec, np.matmul(weights_list1[0], weights_list1[2])), test_seen_sig)
    outputs_seen = np.matmul(np.matmul(test_seen_vec, weights_list1[0]), test_seen_sig)
    
    preds_seen = np.array([np.argmax(output) for output in outputs_seen])
    
    cm_seen = confusion_matrix(new_labels_test_seen, preds_seen)
    #print(cm)
    # Compute macro average (averaging performance metrics by first calculating the metric separately for each class and 
    # then averaging these class-specific metrics)
    cm_seen_micro = cm_seen.astype('float') / cm_seen.sum(axis=1)[:, np.newaxis]
    #print(cm)
    avg_seen_micro = (sum(cm_seen_micro.diagonal())/len(unique_labels_test_seen))*100

    avg_seen_macro = (sum(cm_seen.diagonal())/len(new_labels_test_seen))*100
    
    #predictions
    outputs_unseen = np.matmul(np.matmul(test_unseen_vec, weights_list1[0]), test_unseen_sig)
    
    preds_unseen = np.array([np.argmax(output) for output in outputs_unseen])
    
    cm_unseen = confusion_matrix(new_labels_test_unseen, preds_unseen)
    # Compute macro average (averaging performance metrics by first calculating the metric separately for each class and 
    # then averaging these class-specific metrics)
    cm_unseen_micro = cm_unseen.astype('float') / cm_unseen.sum(axis=1)[:, np.newaxis]
    avg_unseen_micro = (sum(cm_unseen_micro.diagonal())/len(unique_labels_test_unseen))*100

    avg_unseen_macro = (sum(cm_unseen.diagonal())/len(new_labels_test_unseen))*100
    
    harmonic_micro = (2*avg_seen_micro*avg_unseen_micro) / (avg_seen_micro + avg_unseen_micro)
    harmonic_macro = (2*avg_seen_macro*avg_unseen_macro) / (avg_seen_macro + avg_unseen_macro)
    
    print('micro average')
    print('seen accuracy:', avg_seen_micro, 'unseen accuracy:', avg_unseen_micro, 'harmonic mean:', harmonic_micro)
    
    print('macro average')
    print('seen accuracy:', avg_seen_macro, 'unseen accuracy:', avg_unseen_macro, 'harmonic mean:', harmonic_macro)
    
    if harmonic_micro > best_performance_micro[2]:
        best_performance_micro = [avg_seen_micro, avg_unseen_micro, harmonic_micro]
        model1.save_weights(save_path + 'bw_micro_' + name + '.h5', overwrite=True)
        
    if harmonic_macro > best_performance_macro[2]:
        best_performance_macro = [avg_seen_macro, avg_unseen_macro, harmonic_macro]
        model1.save_weights(save_path + 'bw_macro_' + name + '.h5', overwrite=True)
        
    print('best accuracy micro','seen accuracy:', best_performance_micro[0], 'unseen accuracy:', best_performance_micro[1], 'harmonic mean:', best_performance_micro[2])
    print('best accuracy macro', 'seen accuracy:', best_performance_macro[0], 'unseen accuracy:', best_performance_macro[1], 'harmonic mean:', best_performance_macro[2])
    
    print('-----------------------------------------------------------------------------------------------------------')
    weights_list3 = model1.get_weights()
    model2.set_weights(weights_list3[2:])

iteration: 0
model 2 is trained: training acc: 0.109375 , training loss: 4.361388683319092 , validation acc: 0.0 , validation_loss: 4.640748977661133
model 1 is trained: training acc: 0.8947368264198303 , training loss: 5.182311058044434 , validation acc: 0.3125 , validation_loss: 57.87617492675781
micro average
seen accuracy: 7.423998223998223 unseen accuracy: 12.068671820150046 harmonic mean: 9.192973354177177
macro average
seen accuracy: 7.199546485260772 unseen accuracy: 12.133468149646106 harmonic mean: 9.03692151694571
best accuracy micro seen accuracy: 7.423998223998223 unseen accuracy: 12.068671820150046 harmonic mean: 9.192973354177177
best accuracy macro seen accuracy: 7.199546485260772 unseen accuracy: 12.133468149646106 harmonic mean: 9.03692151694571
-----------------------------------------------------------------------------------------------------------
iteration: 1
model 2 is trained: training acc: 0.15625 , training loss: 3.6705005168914795 , validation acc: 0.0 , val

In [21]:
gt_test_seen = to_categorical(new_labels_test_seen, z1_test_seen)

print(gt_test_seen, gt_test_seen.shape)

#test_seen_vec = np.reshape(test_seen_vec, [test_seen_vec.shape[0], 1, 1, test_seen_vec.shape[1]])
res1 = model1.evaluate(test_seen_vec, gt_test_seen)

p1 = model1.predict(test_seen_vec, verbose = 0)

import tensorflow
cce = tensorflow.keras.losses.CategoricalCrossentropy()
print('cce = ', cce(gt_test_seen, p1).numpy())

#test_seen_attributes = np.reshape(train_attributes, [test_seen_attributes.shape[0], 1, train_attributes.shape[1]])
#test_seen_attributes = np.reshape(test_seen_attributes, [test_seen_attributes.shape[0], 1, 1, test_seen_attributes.shape[1]])

res2 = model2.evaluate(test_seen_attributes_2, gt_test_seen)

p2 = model2.predict(test_seen_attributes_2, verbose = 0)

import tensorflow
cce = tensorflow.keras.losses.CategoricalCrossentropy()
print('cce = ', cce(gt_test_seen, p2).numpy())


[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]] (1764, 150)
cce =  6.8933573
cce =  3.2025647


In [24]:

accuracy_seen_updated = res1[1]*100
unseen_accuracy = 26.59
h = (2*accuracy_seen_updated*unseen_accuracy) / (accuracy_seen_updated + unseen_accuracy)
print(h)


accuracy_seen_updated2 = ((res1[1]*100)+(res2[1]*100))/2
print(accuracy_seen_updated2)
h = (2*accuracy_seen_updated2*unseen_accuracy) / (accuracy_seen_updated2 + unseen_accuracy)
print(h)

36.072224751804015
40.70294946432114
32.1665623180964


In [23]:
from sklearn.metrics import precision_recall_fscore_support

pp1 = np.array([np.argmax(output) for output in p1])
pp2 = np.array([np.argmax(output) for output in p2])

seen_macro1 = precision_recall_fscore_support(new_labels_test_seen, pp1, average = 'macro')
seen_macro2 = precision_recall_fscore_support(new_labels_test_seen, pp2, average = 'macro')
print('precision_seen_macro', (seen_macro1[0] + seen_macro2[0])/2, 'recall_seen_macro', (seen_macro1[1] + seen_macro2[1])/2, 'f1_seen_macro', (seen_macro1[2] + seen_macro2[2])/2)


seen_micro1 = precision_recall_fscore_support(new_labels_test_seen, pp1, average = 'micro')
seen_micro2 = precision_recall_fscore_support(new_labels_test_seen, pp2, average = 'micro')
print('precision_seen_micro', (seen_micro1[0] + seen_micro2[0])/2, 'recall_seen_micro', (seen_micro1[1] + seen_micro2[1])/2, 'f1_seen_micro', (seen_micro1[2] + seen_micro2[2])/2)

unseen_macro = precision_recall_fscore_support(new_labels_test_unseen, preds_unseen, average = 'macro')
unseen_micro = precision_recall_fscore_support(new_labels_test_unseen, preds_unseen, average = 'micro')

print('precision_unseen_macro', unseen_macro[0], 'recall_unseen_macro', unseen_macro[1], 'f1_unseen_macro', unseen_macro[2])
print('precision_unseen_micro', unseen_micro[0], 'recall_unseen_micro', unseen_micro[1], 'f1_unseen_micro', unseen_micro[2])


precision_seen_macro 0.38411831269157104 recall_seen_macro 0.414125782714018 f1_seen_macro 0.3679770483718723
precision_seen_micro 0.4070294784580499 recall_seen_micro 0.4070294784580499 f1_seen_micro 0.4070294784580499
precision_unseen_macro 0.3259113762801681 recall_unseen_macro 0.2659352621875366 f1_unseen_macro 0.22192708418859708
precision_unseen_micro 0.2659251769464105 recall_unseen_micro 0.2659251769464105 f1_unseen_micro 0.2659251769464105


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
