In [6]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)

Mounted at /content/drive/


In [7]:
import os
import json
import numpy as np
import scipy.io as scio

**ZSL DATA**
---
load images

In [8]:
def load_classes(file_name):
  classes = list()
  wnids = open(file_name, 'rU')
  try:
      for line in wnids:
          classes.append(line[:-1])
  finally:
      wnids.close()
  return classes
  
def read_images(dataname, type):
  """
  Function: Load images for training and testing

  Parameter
  --------------------------
  dataname: str
    Dataset Name.
  type: str
    type, including "
    train_seen (short for the training data that all belong to seen classes),
    test_seen (short for testing data of seen classes),
    test_unseen (short for testing data of unseen classes)"

  Returns
  --------------------------
    A matrix of shape [image numbers, feature dimension per image]
  Examples
  --------------------------
  """
  data_dir = "/content/drive/MyDrive/ISWC_demo/ZS_IMGC/data"

  if dataname == "AwA2":
    data_path = os.path.join(data_dir, dataname)
    # load cnn features
    matcontent = scio.loadmat(os.path.join(data_path, 'res101.mat'))
    features = matcontent['features'].T
    labels = matcontent['labels'].astype(int).squeeze() - 1

    split_matcontent = scio.loadmat(os.path.join(data_path, 'binaryAtt_splits.mat'))

    if type == 'train_seen':
        loc = split_matcontent['trainval_loc'].squeeze() - 1
    if type == 'test_seen':
        loc = split_matcontent['test_seen_loc'].squeeze() - 1
    if type == 'test_unseen':
        loc = split_matcontent['test_unseen_loc'].squeeze() - 1
    x = features[loc]
    
  else:
    data_path = os.path.join(data_dir, 'ImageNet')
    seen_classes = load_classes(os.path.join(data_path, dataname, 'seen.txt'))
    unseen_classes = load_classes(os.path.join(data_path, dataname, 'unseen.txt'))

    matcontent = scio.loadmat(os.path.join(data_path, 'split.mat'))
    wnids = matcontent['allwnids'].squeeze().tolist()

    if type == 'train_seen':
        feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2012_train')
        classes = seen_classes
    if type == 'test_seen':
        feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2012_val')
        classes = seen_classes
    if type == 'test_unseen':
        feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2011')
        classes = unseen_classes

    x = []
    for cls in classes:
      idx = wnids.index(cls) + 1

      feat_file = os.path.join(feat_path, str(idx) + '.mat')
      features = np.array(scio.loadmat(feat_file)['features'])

      if len(x) == 0:
          x = features
      else:
          x = np.concatenate((x, features), axis=0)

    x = x.astype(np.float32)
  return x

In [9]:
def get_labels(wnids, feat_path, classes):
  y_tag = []  # tag

  for cls in classes:
    idx = wnids.index(cls) + 1

    feat_file = os.path.join(feat_path, str(idx) + '.mat')
    features = np.array(scio.loadmat(feat_file)['features'])

    for _ in range(features.shape[0]):
      y_tag.append(idx)
  return np.array(y_tag)

def read_images_per_class(dataname, class_name):
  """
  Function: Load images of one specified class

  Parameter
  --------------------------
  dataname: str
    Dataset Name.
  class_id: int
    class_id

  Returns
  --------------------------
    number_train_seen, number_test_seen, number_test_unseen, class_name
  Examples
  --------------------------
  """
  data_dir = "/content/drive/MyDrive/ISWC_demo/ZS_IMGC/data"
  if dataname == "AwA2":
    data_path = os.path.join(data_dir, dataname)
    # load cnn features
    matcontent = scio.loadmat(os.path.join(data_path, 'res101.mat'))
    features = matcontent['features'].T
    labels = matcontent['labels'].astype(int).squeeze() - 1

    split_matcontent = scio.loadmat(os.path.join(data_path, 'binaryAtt_splits.mat'))

    allclasses_names = scio.loadmat(os.path.join(data_path, 'att_splits.mat'))['allclasses_names']
    name2id = dict()
    for i in range(len(allclasses_names)):   
        name2id[allclasses_names[i][0][0]] = i
    class_id = name2id[class_name]

    loc_train_seen = split_matcontent['trainval_loc'].squeeze() - 1
    loc_test_seen = split_matcontent['test_seen_loc'].squeeze() - 1
    loc_test_unseen = split_matcontent['test_unseen_loc'].squeeze() - 1

    labels_train_seen = labels[loc_train_seen]
    labels_test_seen = labels[loc_test_seen]
    labels_test_unseen = labels[loc_test_unseen]

    number_train_seen = np.sum(labels_train_seen == class_id)
    number_test_seen = np.sum(labels_test_seen == class_id)
    number_test_unseen = np.sum(labels_test_unseen == class_id)

  else:
    data_path = os.path.join(data_dir, 'ImageNet')
    seen_classes = load_classes(os.path.join(data_path, dataname, 'seen.txt'))
    unseen_classes = load_classes(os.path.join(data_path, dataname, 'unseen.txt'))

    matcontent = scio.loadmat(os.path.join(data_path, 'split.mat'))
    wnids = matcontent['allwnids'].squeeze().tolist()
    allclasses_names = matcontent['allwords']
    name2id = dict()
    for i in range(len(allclasses_names)):   
        name2id[allclasses_names[i][0][0]] = i
    class_id = name2id[class_name]

    # 'train_seen'
    feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2012_train')
    classes = seen_classes
    labels_train_seen = get_labels(wnids, feat_path, classes)
    # 'test_seen':
    feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2012_val')
    classes = seen_classes
    labels_test_seen = get_labels(wnids, feat_path, classes)
    # 'test_unseen':
    feat_path = os.path.join(data_path, 'Res101_Features', 'ILSVRC2011')
    classes = unseen_classes
    labels_test_unseen = get_labels(wnids, feat_path, classes)

    number_train_seen = np.sum(labels_train_seen == class_id)
    number_test_seen = np.sum(labels_test_seen == class_id)
    number_test_unseen = np.sum(labels_test_unseen == class_id)
  return number_train_seen, number_test_seen, number_test_unseen, class_id

In [10]:
data_name = "ImNet_A" # ImNet_A or ImNet_O or AwA2
data_type = "train_seen" # "train_seen" or 'test_seen' or 'test_unseen'

x = read_images(data_name, data_type)
print("features data size: ", x.shape)

  This is separate from the ipykernel package so we can avoid doing imports until


features data size:  (35150, 2048)


In [11]:
data_name = "AwA2" # ImNet_A or ImNet_O or AwA2
class_name = "lion"  # class_id in dataset

number_train_seen, number_test_seen, number_test_unseen, class_id = read_images_per_class(data_name, class_name)
print("number_train_seen in class {} —— {}: {}".format(class_id, class_name, number_train_seen))
print("number_test_seen in class {} —— {}: {}".format(class_id, class_name, number_test_seen))
print("number_test_unseen in class{} —— {}: {}".format(class_id, class_name, number_test_unseen))

number_train_seen in class 42 —— lion: 821
number_test_seen in class 42 —— lion: 198
number_test_unseen in class42 —— lion: 0


**KG DATA**
---
load KG triples

In [12]:
!pip install rdflib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rdflib
  Downloading rdflib-6.2.0-py3-none-any.whl (500 kB)
[K     |████████████████████████████████| 500 kB 13.2 MB/s 
Collecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[K     |████████████████████████████████| 41 kB 652 kB/s 
Installing collected packages: isodate, rdflib
Successfully installed isodate-0.6.1 rdflib-6.2.0


In [13]:
def load_kg_triples(dataname, triple_type="all", read_format="list"):
  """
  Function: load KG triples with different type

  Parameter
  --------------------------
  dataname: str
    Dataset Name.
  triple_type: str
    Triple Type, including "
    cls_hie (short for class hierarchy),
    att_hie (short for attribute hierarchy),
    cls_att (short for class attribute annotation triples),
    cn (short for ConceptNet triples),
    literal (short for entities' liternal name triples),
    same_as (short for triples that connect entities in Basic KG to ConceptNet entities),
    disjoint (short for class-class disjointness and class-attribute disjointness) [optional for AwA2]
    ",
    the default value "all" means the triples of all the above type
    
  read_format: str
    Return Format, including "triple list, xml, turtle"

  Returns
  --------------------------
    List or Str (for xml and turtle)
  Examples
  --------------------------
  """
  data_dir = "/content/drive/MyDrive/ISWC_demo/ZS_IMGC/data"
  data_path = os.path.join(data_dir, dataname, "KG_triples") if dataname == 'AwA2' else os.path.join(data_dir, "ImageNet", dataname, "KG_triples")

  if read_format in ["turtle", "xml"]:
    from rdflib import Graph, Literal, RDF, URIRef, Namespace #basic RDF handling
    from rdflib.namespace import RDF, RDFS, OWL, XSD #most common namespaces
    import urllib.parse #for parsing strings to URI's
    namespace = 'http://www.semanticweb.org/ontologies/' + dataname + '#'
    cn_namespace = 'http://www.semanticweb.org/ontologies/ConceptNet#'
    namespace = Namespace(namespace)
    cn_namespace = Namespace(cn_namespace)
    g = Graph()

  def readTriples(file_name):
    triples, entities, relations = [], [], []
    file = open(file_name, 'r')
    try:
        for line in file:
            lines = line[:-1].split('\t')
            h, r, t = lines[0], lines[1], lines[2]
            entities.append(h)
            entities.append(t)
            relations.append(r)
            triples.append((h, r, t))
            if read_format in ["turtle", "xml"]:
              h_ns = cn_namespace if h.split(':')[0] == 'cn' else namespace
              if r == 'rdfs:label':
                g.add((URIRef(h_ns + h.split(':')[1]), RDFS.label, Literal(t, datatype=XSD.string)))
              else:
                t_ns = cn_namespace if t.split(':')[0] == 'cn' else namespace
                r_ns = cn_namespace if r.split(':')[0] == 'cn' else namespace
                if r == 'rdfs:subClassOf':
                  r_url = RDFS.subClassOf
                elif r == 'owl:disjointWith':
                  r_url = OWL.disjointWith
                elif r == 'owl:sameAs':
                  r_url = OWL.sameAs
                else:
                  r_url = URIRef(r_ns + r.split(':')[1])
                g.add((URIRef(h_ns + h.split(':')[1]), r_url, URIRef(t_ns + t.split(':')[1])))          

    finally:
        file.close()
    return list(set(entities)), list(set(relations)), triples
  
  results = list()
  res_ents, res_rels = [], []
  if triple_type == 'all' or triple_type == 'cls_hie':
    # load triples of class hierarchy
    ents, rels, triples = readTriples(os.path.join(data_path, 'class_hierarchy_triples.txt'))
    results.extend(triples)
    res_ents.extend(ents)
    res_rels.extend(rels)
    print("CLASS HIERARCHY #entities: %d, #relations: %d, #triples:%d"%(len(ents), len(rels), len(triples)))

  if triple_type == 'all' or triple_type == 'cls_att':
    # load triples of class attribute annotations
    ents, rels, triples = readTriples(os.path.join(data_path, 'class_attribute_triples.txt'))
    results.extend(triples)
    res_ents.extend(ents)
    res_rels.extend(rels)
    print("CLASS ATTRIBUTE #entities: %d, #relations: %d, #triples:%d"%(len(ents), len(rels), len(triples)))

  if triple_type == 'all' or triple_type == 'att_hie':
    # load triples of attribute hierarchy
    ents, rels, triples = readTriples(os.path.join(data_path, 'attribute_hierarchy_triples.txt'))
    results.extend(triples)
    res_ents.extend(ents)
    res_rels.extend(rels)
    print("ATTRIBUTE HIERARCHY #entities: %d, #relations: %d, #triples:%d"%(len(ents), len(rels), len(triples)))

  if triple_type == 'all' or triple_type == 'cn':
    # load triples of ConceptNet subset 
    ents, rels, triples = readTriples(os.path.join(data_path, 'conceptnet_triples_filter.txt'))
    results.extend(triples)
    res_ents.extend(ents)
    res_rels.extend(rels)
    print("CONCEPTNET SUBSET #entities: %d, #relations: %d, #triples:%d"%(len(ents), len(rels), len(triples)))

  if triple_type == 'all' or triple_type == 'literal':
    # load triples of literal name
    ents, rels, triples = readTriples(os.path.join(data_path, 'literals.txt'))
    results.extend(triples)
    res_rels.extend(rels)
    print("LITERAL #relations: %d, #triples:%d"%(len(rels), len(triples)))

  if triple_type == 'all' or triple_type == 'same_as':
    # load triples of sameAs relatinship
    ents, rels, triples = readTriples(os.path.join(data_path, 'sameAs_triples.txt'))
    results.extend(triples)
    res_ents.extend(ents)
    res_rels.extend(rels)
    print("SAME AS #entities: %d, #relations: %d, #triples:%d"%(len(ents), len(rels), len(triples)))
  
  if dataname == 'AwA2':
    if triple_type == 'all' or triple_type == 'disjoint':
      # load triples of class-class disjointness and class-attribute disjointness
      ents, rels, triples = readTriples(os.path.join(data_path, 'disjoint_cls_cls_triples.txt'))
      results.extend(triples)
      res_ents.extend(ents)
      res_rels.extend(rels)
      print("CLASS DISJOINT #entities: %d, #relations: %d, #triples: %d"%(len(ents), len(rels), len(triples)))
      ents, rels, triples = readTriples(os.path.join(data_path, 'disjoint_cls_att_triples.txt'))
      results.extend(triples)
      res_ents.extend(ents)
      res_rels.extend(rels)
      print("ATTRIBUTE DISJOINT #entities: %d, #relations: %d, #triples:%d"%(len(ents), len(rels), len(triples)))
  if triple_type == 'all':
    print("ALL #triples: %d, #relations: %d, #triples:%d"%(len(set(res_ents)), len(set(res_rels)), len(results)))

  if read_format in ["turtle", "xml"]:
    g.bind("owl", OWL)
    g.bind("cn", cn_namespace)
    g.bind(dataname.lower(), namespace)
    return g.serialize(format=read_format)
  else:
    return results

In [14]:
results = load_kg_triples("ImNet_A", triple_type="cls_hie")

CLASS HIERARCHY #entities: 111, #relations: 1, #triples:111


In [15]:
print(results)

[('ImNet-A:n01661091', 'rdfs:subClassOf', 'ImNet-A:n01471682'), ('ImNet-A:n02443484', 'rdfs:subClassOf', 'ImNet-A:n02441326'), ('ImNet-A:n01734418', 'rdfs:subClassOf', 'ImNet-A:n01727646'), ('ImNet-A:n02220225', 'rdfs:subClassOf', 'ImNet-A:n02219486'), ('ImNet-A:n02208280', 'rdfs:subClassOf', 'ImNet-A:n02206856'), ('ImNet-A:n02206856', 'rdfs:subClassOf', 'ImNet-A:n02206270'), ('ImNet-A:n01473806', 'rdfs:subClassOf', 'ImNet-A:n01471682'), ('ImNet-A:n01471682', 'rdfs:subClassOf', 'ImNet-A:n01466257'), ('ImNet-A:n02441942', 'rdfs:subClassOf', 'ImNet-A:n02441326'), ('ImNet-A:n01798706', 'rdfs:subClassOf', 'ImNet-A:n01798484'), ('ImNet-A:n02492035', 'rdfs:subClassOf', 'ImNet-A:n02489589'), ('ImNet-A:n01737875', 'rdfs:subClassOf', 'ImNet-A:n01737021'), ('ImNet-A:n01494475', 'rdfs:subClassOf', 'ImNet-A:n01482330'), ('ImNet-A:n02006985', 'rdfs:subClassOf', 'ImNet-A:n02006656'), ('ImNet-A:n02448633', 'rdfs:subClassOf', 'ImNet-A:n02447366'), ('ImNet-A:n02153203', 'rdfs:subClassOf', 'ImNet-A:n017

In [16]:
results = load_kg_triples("AwA2", triple_type="cls_att", read_format="turtle")

CLASS ATTRIBUTE #entities: 135, #relations: 15, #triples:1562


In [17]:
print(results)

@prefix awa2: <http://www.semanticweb.org/ontologies/AwA2#> .

awa2:n01889520 awa2:actRole awa2:a057 ;
    awa2:canMoveeBy awa2:a038,
        awa2:a039 ;
    awa2:eat awa2:a055,
        awa2:a056 ;
    awa2:hasBehavior awa2:a049,
        awa2:a050,
        awa2:a083 ;
    awa2:hasBodyPart awa2:a023,
        awa2:a032,
        awa2:a046 ;
    awa2:hasBodyShape awa2:a017,
        awa2:a018 ;
    awa2:hasCharacter awa2:a047,
        awa2:a051,
        awa2:a080 ;
    awa2:hasColor awa2:a001,
        awa2:a004,
        awa2:a005 ;
    awa2:hasHabitat awa2:a063,
        awa2:a064,
        awa2:a069,
        awa2:a070,
        awa2:a071,
        awa2:a075,
        awa2:a084 ;
    awa2:hasTeeth awa2:a027,
        awa2:a029 ;
    awa2:hasTexture awa2:a012 ;
    awa2:is awa2:a016 ;
    awa2:looks awa2:a043 ;
    awa2:moves awa2:a040,
        awa2:a041 .

awa2:n02064816 awa2:actRole awa2:a061 ;
    awa2:canMoveeBy awa2:a037 ;
    awa2:eat awa2:a052,
        awa2:a054 ;
    awa2:hasBehavior awa2: