**Run DeViSE on ImNet-A with Basic KG**
---
You can run other settings by changing the parameters of "dataset" and "semantic type"

**1. Bind your Google Drive**

In [16]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


**2. Import Package**

In [17]:
import numpy as np
import random
import argparse
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import heapq
import os
import scipy.io as scio
from collections import defaultdict

**3. Parameters Setting**

In [18]:
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', default='/content/drive/MyDrive/ISWC_demo/ZS_IMGC/data', help='root directory')
parser.add_argument('--dataset', default='ImNet_A', help='target datasets, options: {AwA2, ImNet_A, ImNet_O}')
parser.add_argument('--semantic_type', default='hie', type=str, help='the type of class embedding to input, options: {att, w2v, w2v-glove, hie, kge (Basic KG), kge_text (Basic KG+literal), kge_facts (Basic KG+CN), kge_logics (Basic KG+logics)}')
parser.add_argument("--gpu", type=int, default=0, help="Which GPU to use?")
'''
Training Parameter
'''
parser.add_argument('--loss_fn', default='mse', help='loss function, options: {mse, margin}')
parser.add_argument('--p', default=0.5, help='dropout', type=float)
parser.add_argument('--batch_size', default=64, help='training batch size', type=int)
parser.add_argument('--lr', default=1e-3, help='learning rate', type=float)
parser.add_argument('--wds', default=1e-5, help='', type=float)
parser.add_argument('--epoch_num', default=60, help='training epoch', type=int)
parser.add_argument('--manual_seed', default=12345, help='random seed', type=int)

args = parser.parse_known_args()[0]

if args.manual_seed is None:
  args.manual_seed = random.randint(1, 10000)
print("Random Seed: ", args.manual_seed)

np.random.seed(args.manual_seed)
random.seed(args.manual_seed)
torch.manual_seed(args.manual_seed)
if torch.cuda.is_available():
    torch.cuda.set_device(args.gpu)
    print('using gpu {}'.format(args.gpu))
    torch.cuda.manual_seed_all(args.manual_seed)
    torch.backends.cudnn.deterministic = True
else:
    print("GPU is not available!")

Random Seed:  12345
using gpu 0


**4.  Loading Data**

In [19]:
def load_semantic_embed(data_path, dataset, type):
    """
    Load Semantic Embeddings.

    Parameters
    ----------
    file_name : str
        Name of the semantic embedding file.
    type: str
        Type of semantic embeddings, including

    Returns
    -------
    embeddings : NumPy arrays
       the size is [class nums, semantic embedding dimension]
    Examples
    --------
    """

    file_name = ''

    if dataset == 'AwA2':
        file_path = os.path.join(data_path, 'semantic_embeddings')
        if type == 'att':
            file_name = os.path.join(data_path, 'binaryAtt_splits.mat')
        elif type == 'w2v':
            file_name = os.path.join(file_path, 'awa_w2v.mat')
        elif type == 'w2v-glove':
            file_name = os.path.join(file_path, 'awa_w2v_glove.mat')
        elif type == 'hie':
            file_name = os.path.join(file_path, 'awa_hierarchy_gae.mat')
        elif type == 'kge':
            file_name = os.path.join(file_path, 'kge_CH_AH_CA_60000.mat')
        elif type == 'kge_text':
            file_name = os.path.join(file_path, 'kge_CH_AH_CA_60000_text_140.mat')
        elif type == 'kge_facts':
            file_name = os.path.join(file_path, 'kge_CH_AH_CA_Facts_60000_80000.mat')
        elif type == 'kge_logics':
            file_name = os.path.join(file_path, 'kge_CH_AH_CA_Logics_70000.mat')
        else:
            print("WARNING: invalid semantic embeddings type")

    else:
        file_path = os.path.join(data_path, dataset, 'semantic_embeddings')
        if type == 'hie':
            file_name = os.path.join(file_path, 'hierarchy_gae.mat')
        elif type == 'w2v':
            file_name = os.path.join(data_path, 'w2v.mat')
        elif type == 'w2v-glove':
            file_name = os.path.join(file_path, 'w2v_glove.mat')
        elif type == 'att':
            file_name = os.path.join(file_path, 'atts_binary.mat')
        # else:
        #     print('WARNING: invalid semantic embeddings type')

        if dataset == 'ImNet_A':
            if type == 'kge':
                file_name = os.path.join(file_path, 'kge_CH_AH_CA_60000.mat')
            elif type == 'kge_text':
                file_name = os.path.join(file_path, 'kge_CH_AH_CA_60000_text_nei_140.mat')
            elif type == 'kge_facts':
                file_name = os.path.join(file_path, 'kge_CH_AH_CA_Facts_60000_70000.mat')
        if dataset == 'ImNet_O':
            if type == 'kge':
                file_name = os.path.join(file_path, 'kge_CH_AH_CA_60000.mat')
            elif type == 'kge_text':
                file_name = os.path.join(file_path, 'kge_CH_AH_CA_60000_text_nei_140.mat')
            elif type == 'kge_facts':
                file_name = os.path.join(file_path, 'kge_CH_AH_CA_Facts_60000_70000.mat')
    if file_name:
        matcontent = scio.loadmat(file_name)
        if dataset == 'AwA2':
            if type == 'att':
                cls_embeddings = matcontent['att'].T
            else:
                cls_embeddings = matcontent['embeddings']
        else:
            if type == 'w2v':
                cls_embeddings = matcontent['w2v'][:2549]
            else:
                cls_embeddings = matcontent['embeddings']
    else:
        print('WARNING: invalid semantic embeddings file path')
    return cls_embeddings


class DATAReader(Dataset):
    """
    Load ZSL Data.
    """

    def __init__(self, args, type):
        if args.dataset == 'AwA2':
            data_path = os.path.join(args.data_dir, args.dataset)
            self.semantic_embed = load_semantic_embed(data_path, args.dataset, type=args.semantic_type)
            self.read_dataset(args, type)
        else:
            data_path = os.path.join(args.data_dir, 'ImageNet')
            self.semantic_embed = load_semantic_embed(data_path, args.dataset, type=args.semantic_type)
            self.read_imagenet(args, type)

    def read_imagenet(self, args, type):
        data_path = os.path.join(args.data_dir, 'ImageNet')

        def load_classes(file_name):
            classes = list()
            wnids = open(file_name, 'rU')
            try:
                for line in wnids:
                    classes.append(line[:-1])
            finally:
                wnids.close()
            return classes

        seen_classes = load_classes(os.path.join(data_path, args.dataset, 'seen.txt'))
        unseen_classes = load_classes(os.path.join(data_path, args.dataset, 'unseen.txt'))


        matcontent = scio.loadmat(os.path.join(data_path, 'split.mat'))
        wnids = matcontent['allwnids'].squeeze().tolist()

        if type == 'train_seen':
            feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2012_train')
            classes = seen_classes
        if type == 'test_seen':
            feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2012_val')
            classes = seen_classes
        if type == 'test_unseen':
            feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2011')
            classes = unseen_classes

        # load data
        self.x = []
        self.y_tag = []  # tag
        self.y_vec = []  # vec
        self.y_vec_neg = []  # vec

        self.all_sem_vec = []
        self.ids = []
        for cls in classes:
            idx = wnids.index(cls) + 1

            feat_file = os.path.join(feat_path, str(idx) + '.mat')
            features = np.array(scio.loadmat(feat_file)['features'])

            self.ids.append(idx)
            self.all_sem_vec.append(self.semantic_embed[idx - 1])


            for _ in range(features.shape[0]):
                self.y_tag.append(idx)

            for _ in range(features.shape[0]):
                self.y_vec.append(self.semantic_embed[idx - 1])
                while True:
                    neg_cls = random.choice(classes)
                    if neg_cls != cls:
                        break
                neg_idx = wnids.index(neg_cls)
                self.y_vec_neg.append(self.semantic_embed[neg_idx])

            if len(self.x) == 0:
                self.x = features
            else:
                self.x = np.concatenate((self.x, features), axis=0)

        self.x = self.x.astype(np.float32)
        self.y_vec = np.array(self.y_vec).astype(np.float32)
        self.y_vec_neg = np.array(self.y_vec_neg).astype(np.float32)
        print("features data size: ", self.x.shape)  # (24700, 2048)  2450
        print("tag data len: ", len(self.y_tag))  # (24700)  2450
        print("vec data size: ", self.y_vec.shape)  # (24700,500)  2450

        self.all_sem_vec = np.array(self.all_sem_vec)
        # print(self.y_tag)




    def read_dataset(self, args, type):

        data_path = os.path.join(args.data_dir, args.dataset)
        # load cnn features
        matcontent = scio.loadmat(os.path.join(data_path, 'res101.mat'))
        features = matcontent['features'].T
        labels = matcontent['labels'].astype(int).squeeze() - 1

        split_matcontent = scio.loadmat(os.path.join(data_path, 'binaryAtt_splits.mat'))

        if type == 'train_seen':
            loc = split_matcontent['trainval_loc'].squeeze() - 1
        if type == 'test_seen':
            loc = split_matcontent['test_seen_loc'].squeeze() - 1
        if type == 'test_unseen':
            loc = split_matcontent['test_unseen_loc'].squeeze() - 1



        self.x = features[loc]
        self.y_tag = labels[loc]
        all_tags = np.unique(self.y_tag)

        self.y_vec = []
        self.y_vec_neg = []
        for i in range(self.y_tag.shape[0]):
            self.y_vec.append(self.semantic_embed[self.y_tag[i]])
            while True:
                neg_tag = random.choice(all_tags)
                if neg_tag != self.y_tag[i]:
                    break
            self.y_vec_neg.append(self.semantic_embed[neg_tag])


        self.x = self.x.astype(np.float32)
        self.y_vec = np.array(self.y_vec).astype(np.float32)
        self.y_vec_neg = np.array(self.y_vec_neg).astype(np.float32)

        print("features data size: ", self.x.shape)  # (24700, 2048)  2450
        print("semantic data size: ", self.y_vec.shape)  # (24700,500)  2450


        self.ids = all_tags.tolist()


        self.all_sem_vec = []
        for i in range(all_tags.shape[0]):
            self.all_sem_vec.append(self.semantic_embed[all_tags[i]])
        self.all_sem_vec = np.array(self.all_sem_vec)


    def __len__(self):
        return (self.x.shape[0])

    def __getitem__(self, idx):
        tmp_x = self.x[idx]
        tmp_y_tag = self.y_tag[idx]
        tmp_y_vec = self.y_vec[idx]
        tmp_y_vec_neg = self.y_vec_neg[idx]

        return (tmp_x, (tmp_y_vec, tmp_y_vec_neg, tmp_y_tag)) #vec  tag

**5. DeViSE model**

In [20]:
class devise(nn.Module):
    def __init__(self, input_dims, output_dims, p):
        super(devise, self).__init__()
        self.model = nn.Sequential(nn.BatchNorm1d(input_dims),
                         nn.Dropout(p),
                         nn.Linear(in_features=input_dims, out_features=2048, bias=True),
                         nn.ReLU(),
                         nn.BatchNorm1d(2048),
                         nn.Dropout(p),
                         nn.Linear(in_features=2048, out_features=output_dims, bias=True))
    def forward(self, x):
        x = self.model(x)
        return x

**6. Evaluation Functions**

In [21]:
def macro_acc(true_label, pre_label):
    """
    Evaluation Metrics: Macro Accuracy, compute accuracy for each class and average accuracies over all testing classes
    """
    label_2_num = defaultdict(int)
    label_pre_true_num = defaultdict(int)
    label_2_acc = defaultdict(float)

    sz = len(true_label)
    for i in range(sz):
        label_2_num[true_label[i]] += 1
        if (pre_label[i] == true_label[i]):
            label_pre_true_num[true_label[i]] += 1
    for label, num in label_2_num.items():
        label_2_acc[label] = float(label_pre_true_num[label] / num)
    sum = 0.
    for i, j in label_2_acc.items():
        sum += j

    return sum / len(label_2_acc)


def dtest(te, model, id_space, sem_space, loss_fn):

    sem_space = sem_space.transpose()

    with torch.no_grad():
        model.eval()
        real_label_test = []
        pre_label_test_1 = []
        pre_label_test_2 = []  # hit 2
        pre_label_test_5 = []  # hit 5
        loss_total_test = 0
        for (vx, vy) in te:
            val_vec_y, val_vec_y_neg, val_tag_y = vy
            val_vec_y = val_vec_y.cuda()
            val_vec_y_neg = val_vec_y_neg.cuda()
            vx = vx.cuda()

            vy_pred = model(vx)


            if loss_fn == 'mse':
                loss_fn = nn.MSELoss()
                vloss = loss_fn(vy_pred, val_vec_y)

            if loss_fn == 'margin':
                pos_score = F.cosine_similarity(vy_pred, val_vec_y)
                neg_score = F.cosine_similarity(vy_pred, val_vec_y_neg)
                vloss = torch.mean(torch.max(1.0 - pos_score + neg_score,
                                            torch.zeros_like(pos_score).cuda()))


            loss_total_test += vloss.item()
            val_tag_y = [t.item() for t in val_tag_y]
            real_label_test.extend(val_tag_y)
            vy_pred_cpu = vy_pred.cpu().detach().numpy()
            vsz = len(val_tag_y)
            vtt = np.dot(vy_pred_cpu, sem_space)  # judge by dot Multiplication
            for n in range(vsz):
                e = heapq.nlargest(5, range(len(vtt[n])), vtt[n].take)  # top 5 hit
                vi = 0
                while vi < 5:
                    if (id_space[e[vi]] == val_tag_y[n]):  # pre right
                        break
                    vi += 1
                pre_label_test_1.append(id_space[e[0]])
                pre_label_test_2.append(id_space[e[0]])
                pre_label_test_5.append(id_space[e[0]])

                if (vi <= 1):
                    pre_label_test_2[-1] = val_tag_y[n]
                    pre_label_test_5[-1] = val_tag_y[n]
                elif (vi <= 4):
                    pre_label_test_5[-1] = val_tag_y[n]

        acc_test_1 = macro_acc(real_label_test, pre_label_test_1)
        acc_test_2 = macro_acc(real_label_test, pre_label_test_2)
        acc_test_5 = macro_acc(real_label_test, pre_label_test_5)

        return acc_test_1, acc_test_2, acc_test_5, loss_total_test

**7. Model Training**

In [22]:
def train():
    # load training data (seen classes)
    tr_img = DATAReader(args, 'train_seen')
    tr = DataLoader(tr_img, batch_size=args.batch_size, shuffle=True, num_workers=8)

    model = devise(tr_img.x.shape[1], tr_img.y_vec.shape[1], args.p).cuda()


    optimizer_tag = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wds)
    print('using {} as criterion'.format(args.loss_fn))

    # load unseen testing data
    te_img_unseen = DATAReader(args, 'test_unseen')
    te_unseen = DataLoader(te_img_unseen, batch_size=50, num_workers=8)

    # # load seen testing data
    te_img_seen = DATAReader(args, 'test_seen')
    te_seen = DataLoader(te_img_seen, batch_size=50, num_workers=8)


    print('Begin Training ...')

    for epoch in range(args.epoch_num):
        model.train()
        loss_total = 0

        real_label = []
        pre_label_1 = []
        for i, (x, y) in enumerate(tr, 1):
            vec_y, vec_y_neg, tag_y = y  # vec  tag
            x = x.cuda()
            vec_y = vec_y.cuda()
            vec_y_neg = vec_y_neg.cuda()
            model.zero_grad()
            y_pred = model(x)

            if args.loss_fn == 'mse':
                loss_fn = nn.MSELoss()
                loss = loss_fn(y_pred, vec_y)

            if args.loss_fn == 'margin':
                pos_score = F.cosine_similarity(y_pred, vec_y)
                neg_score = F.cosine_similarity(y_pred, vec_y_neg)

                loss = torch.mean(torch.max(1.0 - pos_score + neg_score,
                                                torch.zeros_like(pos_score).cuda()))


            loss.backward()
            optimizer_tag.step()
            tag_y = [t.item() for t in tag_y]
            real_label.extend(tag_y)  # batch_size
            sz = len(tag_y)
            y_pred_cpu = y_pred.cpu().detach().numpy()
            tt = np.dot(y_pred_cpu, tr_img.all_sem_vec.transpose())  # judge by dot Multiplication
            for n in range(sz):
                e = heapq.nlargest(5, range(len(tt[n])), tt[n].take)
                ii = 0
                while ii < 5:
                    if(tr_img.ids[e[ii]] == tag_y[n]):
                        break
                    ii += 1
                pre_label_1.append(tr_img.ids[e[0]])



            loss_total += loss.item()

        acc_1 = macro_acc(real_label, pre_label_1)  # hit 1
        print()
        print('Epoch {:2d}/{:2d}; ----- total_loss:{:06.5f}; macro_acc_1: {:04.2f} -----'.format(epoch, args.epoch_num, loss_total,acc_1*100))

        # gzsl: testing unseen
        acc_test_1, acc_test_2, acc_test_5, loss_total_test = \
            dtest(te_unseen, model, te_img_unseen.ids, te_img_unseen.all_sem_vec, args.loss_fn)

        print('Test Standard ZSL  | Hit 1: {:04.2f}%; Hit 2: {:04.2f}%; Hit 5: {:04.2f}%'.format(acc_test_1 * 100, acc_test_2 * 100, acc_test_5 * 100))


        # gzsl: testing unseen + seen
        acc_unseen, _, _, loss_total_test = \
            dtest(te_unseen, model, te_img_unseen.ids + te_img_seen.ids,
                  np.vstack((te_img_unseen.all_sem_vec, te_img_seen.all_sem_vec)), args.loss_fn)

        acc_seen, _, _, loss_total_test = \
            dtest(te_seen, model, te_img_unseen.ids + te_img_seen.ids,
                  np.vstack((te_img_unseen.all_sem_vec, te_img_seen.all_sem_vec)), args.loss_fn)

        mean = 2 * acc_seen * acc_unseen / (acc_seen + acc_unseen)
        print('Test Generalized ZSL | Acc Seen: {:04.2f}%; Acc Unseen: {:04.2f}%; Mean: {:04.2f}%'.format(acc_seen * 100,
                                                                                                 acc_unseen * 100,
                                                                                              mean * 100))
        

In [None]:
train()



features data size:  (35150, 2048)
tag data len:  35150
vec data size:  (35150, 100)


  cpuset_checked))


using mse as criterion
features data size:  (39523, 2048)
tag data len:  39523
vec data size:  (39523, 100)
features data size:  (1400, 2048)
tag data len:  1400
vec data size:  (1400, 100)
Begin Training ...
Epoch  0/60; ----- total_loss:38.96863; macro_acc_1: 65.08 -----
Test ZSL  | Hit 1: 29.74%; Hit 2: 59.50%; Hit 5: 94.11%
Test GZSL | Acc Seen: 63.29%; Acc Unseen: 1.67%; Mean: 3.25%

Epoch  1/60; ----- total_loss:10.64372; macro_acc_1: 67.14 -----
Test ZSL  | Hit 1: 30.49%; Hit 2: 59.30%; Hit 5: 93.73%
Test GZSL | Acc Seen: 59.36%; Acc Unseen: 0.64%; Mean: 1.27%

Epoch  2/60; ----- total_loss:9.32034; macro_acc_1: 68.09 -----
Test ZSL  | Hit 1: 29.98%; Hit 2: 59.53%; Hit 5: 94.31%
Test GZSL | Acc Seen: 63.93%; Acc Unseen: 1.10%; Mean: 2.17%

Epoch  3/60; ----- total_loss:7.30298; macro_acc_1: 68.96 -----
Test ZSL  | Hit 1: 30.45%; Hit 2: 60.65%; Hit 5: 94.08%
Test GZSL | Acc Seen: 62.79%; Acc Unseen: 1.61%; Mean: 3.15%

Epoch  4/60; ----- total_loss:5.79971; macro_acc_1: 69.22 ---