In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd "drive/MyDrive/src"

/content/drive/.shortcut-targets-by-id/19SQ4Q8TMprp6D7i0D29mHcTJOj0FYWuu/src


In [None]:
!ls

embedding_2.npy		      reptile.py
embedding_label.npz	      split_dataset.py
entity_embedding_mapping.txt  type_dict.txt
entity_type_embedding.txt     YAGO4-class.txt
meta.py			      YAGO4ET20-test.txt
__pycache__		      YAGO4ET20-train.txt
Reptile.ipynb		      YAGO4-types-freq20-single-mapping.txt


In [None]:
import sys
sys.path.insert(0,'/content/drive/My Drive/Courses/CS 543/Final Proj/')


In [None]:
#@formatter:off
%load_ext autoreload
%autoreload 2
#@formatter:on

# Read in dataset

In [None]:
import numpy as np
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import Linear
from copy import deepcopy

# X = np.load('entity_embedding_200.npy')
# y = np.load('type.npy')


# load type_dict.txt as two dictionary
## type_dict_idxtoname: idex (int) -> type name
## type_dict_nametoidx: type name -> index (int)
with open("type_dict.txt", encoding="UTF-8") as f:
    type_dict_idxtoname = {}
    type_dict_nametoidx = {}
    aa = f.readline().strip()
    while aa:
        type_idx, type_name = aa.split('\t')
        type_dict_idxtoname[int(type_idx)] = type_name
        type_dict_nametoidx[type_name] = int(type_idx)
        aa = f.readline().strip()
        

# entity name to index mapping
entity_to_index = {}
with open('entity_embedding_mapping.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
    for line in lines:
        e, i = line.strip().split('\t')
        i = int(i)
        entity_to_index[e] = i

# types
types = set()
with open('YAGO4ET20-train.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
    for line in lines:
        e, t = line.strip().split('\t')
        types.add(t)
types = list(types)

# load data        
data_npz = np.load('embedding_label.npz')
X = data_npz['embedding']
y = data_npz['label']

# meta task generation

import random 
import meta

def sample_meta_task(n_shots=5, type_name=None):
    if type_name is None:
        type_name = random.choice(types)
    pos, neg = meta.generate_meta_task(type_name)
    n_shots = min([n_shots, len(pos), len(neg)])
    pos = random.sample(pos, n_shots)
    neg = random.sample(neg, n_shots)
    pos = np.array([entity_to_index[e] for e in pos])
    neg = np.array([entity_to_index[e] for e in neg])
    features = np.vstack([X[pos], X[neg]])
    labels =  np.concatenate((
        np.ones(len(pos)), 
        np.zeros(len(neg))))
    return features, labels

def get_test_task(type_name, test_compatible_types=False):
    train_features, train_labels = sample_meta_task(type_name=type_name)
    pos = meta.get_test_positive_examples(
        type_name, 
        test_compatible_types=test_compatible_types)
    pos = [entity_to_index[e] for e in pos]
    test_features = X[pos]
    return train_features, train_labels, test_features


In [None]:
# model 


INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE = 400, 1024, 1

class Model(nn.Module):
    def __init__(self, weights=None):
        super().__init__()
        self.fc1 = Linear(INPUT_SIZE, HIDDEN_SIZE)
        self.fc2 = Linear(HIDDEN_SIZE, HIDDEN_SIZE)
        self.fc3 = Linear(HIDDEN_SIZE, HIDDEN_SIZE)
        self.out = Linear(HIDDEN_SIZE, OUTPUT_SIZE)

        # This has to be after the weight initializations or else we get a
        # KeyError.
        if weights is not None:
            self.load_state_dict(deepcopy(weights))

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        return F.sigmoid(self.out(x).view(-1))

def inner_train_func(model, x, y, n_iter, log_period=1, **kwargs):
    for epoch in range(n_iter):  # loop over the dataset multiple times
        running_loss = 0.0
        
        inputs, labels = x, y
        #inputs, labels = data
        #criterion = nn.CrossEntropyLoss()
        criterion =  nn.BCELoss(reduction='mean')
        optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if log_period is not None:
            if epoch % log_period == log_period-1:    # print every log_period mini-batches
                print(f'[{epoch + 1}, {epoch + 1:5d}] loss: {running_loss / log_period:.3f}')
                running_loss = 0.0

def copy_model(original_model):
    import copy
    return copy.deepcopy(original_model)

In [None]:

def parse_args(args=None):
    parser = argparse.ArgumentParser(
        description='Meta Learning Arguments',
        usage='train.py [<args>] [-h | --help]'
    )

    parser.add_argument('--cuda', action='store_true', help='use GPU')
    parser.add_argument('--data_path', type=str, default=None)
    parser.add_argument('--model', default='Reptile', type=str)
    #parser.add_argument('-b', '--batch_size', default=1024, type=int)
    parser.add_argument('-lr', '--learning_rate', default=0.0001, type=float)
    parser.add_argument('--n_shot', default=5, type=int)
    parser.add_argument('-save', '--save_path', default=None, type=str)
    parser.add_argument('--outer_loops', default=10, type=int)
    parser.add_argument('--inner_loops', default=1000, type=int)
    parser.add_argument('--log_steps', default=100, type=int, help='train log every xx steps')
    parser.add_argument('--test_log_steps', default=1000, type=int, help='valid/test log every xx steps')
    
    return parser.parse_args(args)
def reptile_train(model, device, n_shot,
                  n_iter_meta, meta_step_size,
                  inner_train_func, n_iter_inner=1000,
                  log_period_meta=10, log_period_inner=1):
    # Reptile training loop
    for iteration in range(n_iter_meta):
        weights_before = deepcopy(model.state_dict())
        # Generate task 
        x, y = sample_meta_task(n_shot)
        x = torch.from_numpy(x.astype(np.float32)).to(device)
        y = torch.from_numpy(y.astype(np.float32)).to(device)
        # Do optimization on this task
        if iteration % log_period_meta == 0:
            print('Meta iter', iteration, ': ')
        inner_train_func(model, x, y, n_iter=n_iter_inner,
                         log_period=log_period_inner 
                            if iteration % log_period_meta == 0 else None)
        # Interpolate between current weights and trained weights from this task
        # I.e. (weights_before - weights_after) is the meta-gradient
        weights_after = model.state_dict()
        step_size = meta_step_size * (
                1 - iteration / n_iter_meta)  # linear schedule
        model.load_state_dict(
            {name: weights_before[name] + (weights_after[name] -
                                           weights_before[name]) * step_size
             for name in weights_before})


In [None]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model().to(device)
#meta_weights = model.state_dict()
reptile_train(
    model, device, 100, 10000, meta_step_size=0.5, 
    inner_train_func=inner_train_func, 
    n_iter_inner=20,
    log_period_meta=500, log_period_inner=5)


Meta iter 0 : 
[5,     5] loss: 0.139
[10,    10] loss: 0.138
[15,    15] loss: 0.138
[20,    20] loss: 0.138




Meta iter 500 : 
[5,     5] loss: 0.175
[10,    10] loss: 0.058
[15,    15] loss: 0.031
[20,    20] loss: 0.022
Meta iter 1000 : 
[5,     5] loss: 0.001
[10,    10] loss: 0.001
[15,    15] loss: 0.001
[20,    20] loss: 0.001
Meta iter 1500 : 
[5,     5] loss: 0.005
[10,    10] loss: 0.003
[15,    15] loss: 0.002
[20,    20] loss: 0.002
Meta iter 2000 : 
[5,     5] loss: 0.182
[10,    10] loss: 0.018
[15,    15] loss: 0.011
[20,    20] loss: 0.008
Meta iter 2500 : 
[5,     5] loss: 0.005
[10,    10] loss: 0.003
[15,    15] loss: 0.002
[20,    20] loss: 0.002
Meta iter 3000 : 
[5,     5] loss: 0.004
[10,    10] loss: 0.003
[15,    15] loss: 0.002
[20,    20] loss: 0.002
Meta iter 3500 : 
[5,     5] loss: 0.000
[10,    10] loss: 0.000
[15,    15] loss: 0.000
[20,    20] loss: 0.000
Meta iter 4000 : 
[5,     5] loss: 0.001
[10,    10] loss: 0.001
[15,    15] loss: 0.001
[20,    20] loss: 0.001
Meta iter 4500 : 
[5,     5] loss: 0.156
[10,    10] loss: 0.008
[15,    15] loss: 0.005
[20,    

In [None]:
correct = 0
total = 0
acc = 0
# again no gradients needed

for t in types:
    train_features, train_labels, test_features = get_test_task(t, False)
    #images, labels = data
    #inputs, labels = data[0].to(device), data[1].to(device)
    x = torch.from_numpy(train_features.astype(np.float32)).to(device)
    y = torch.from_numpy(train_labels.astype(np.float32)).to(device)
    # Do optimization on this task
    tmp_model = copy_model(model).to(device)
    inner_train_func(tmp_model, x, y, n_iter=20,
                    log_period=None)
    with torch.no_grad():
      outputs = tmp_model(torch.from_numpy(test_features).to(device)).detach()
      predictions = outputs > 0.5
      n_examples = test_features.shape[0]
      n_correct = torch.sum(predictions.double()).item()
      correct += n_correct
      total += n_examples
      predictions.size
      acc += torch.mean(predictions.double()).item() # ground truth are all positive

    # _, predictions = torch.max(outputs, 1)
    # # collect the correct predictions for each class
    # for label, prediction in zip(labels, predictions):
    #     total += 1
    #     if label == prediction:
    #         correct += 1
print(f'Accuracy of the network: {100 * acc / len(types)} %')
print(f'Accuracy of the network (by data count): {100 * correct / total} %')
print(len(types), total)

Accuracy of the network: 15.24202393716967 %
Accuracy of the network (by data count): 64.9197230814553 %
311 33945


In [None]:
correct = 0
total = 0
acc = 0
# again no gradients needed

for t in types:
    train_features, train_labels, test_features = get_test_task(
        t, test_compatible_types=True)
    #images, labels = data
    #inputs, labels = data[0].to(device), data[1].to(device)
    x = torch.from_numpy(train_features.astype(np.float32)).to(device)
    y = torch.from_numpy(train_labels.astype(np.float32)).to(device)
    # Do optimization on this task
    tmp_model = copy_model(model).to(device)
    inner_train_func(tmp_model, x, y, n_iter=20,
                    log_period=None)
    with torch.no_grad():
      outputs = tmp_model(torch.from_numpy(test_features).to(device)).detach()
      predictions = outputs > 0.5
      n_examples = test_features.shape[0]
      n_correct = torch.sum(predictions.double()).item()
      correct += n_correct
      total += n_examples
      predictions.size
      acc += torch.mean(predictions.double()).item() # ground truth are all positive

    # _, predictions = torch.max(outputs, 1)
    # # collect the correct predictions for each class
    # for label, prediction in zip(labels, predictions):
    #     total += 1
    #     if label == prediction:
    #         correct += 1
print(f'Accuracy of the network: {100 * acc / len(types)} %')
print(f'Accuracy of the network (by data count): {100 * correct / total} %')
print(len(types), total)

Accuracy of the network: 15.614925726322706 %
Accuracy of the network (by data count): 38.11165120047135 %
311 33945


In [None]:
print(X.shape, max(entity_to_index.values()), len(entity_to_index))

In [None]:
a, b = meta.generate_meta_task('http://yago-knowledge.org/resource/Human')
for t in a:
  assert t in entity_to_index
for t in b:
  assert t in entity_to_index