In [28]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable as V
import pandas as pd
import random

class SineTask:
    def __init__(self):
        self.amplitude = np.random.uniform(0.1, 5.0)
        self.phase = np.random.uniform(0, 2*np.pi)
        self.hold_x = None
        
    def sin(self, x):
        return self.amplitude * np.sin(x + self.phase)
    
    def training_data(self, n=10, fresh = False):
        if self.hold_x is None:
            self.hold_x = np.random.uniform(-5, 5, size=(n,1))
            x = self.hold_x
        else:
            if fresh:
                x = np.random.uniform(-5, 5, size=(n,1))
            else:
                x = self.hold_x
        y = self.sin(x)
        return torch.Tensor(x), torch.Tensor(y)
            
        
    def test_data(self, n=50):
        x = np.linspace(-5, 5, num=n).reshape(n,1)
        y = self.sin(x)
        return torch.Tensor(x), torch.Tensor(y)

    
class SineModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = torch.nn.Linear(1, 40)
        self.layer2 = torch.nn.Linear(40, 40)
        self.layer3 = torch.nn.Linear(40, 1)
    
    def forward(self, x, parameters=None):
        x = F.linear(x, parameters['layer1.weight'], parameters['layer1.bias'])
        x = F.relu(x)
        x = F.linear(x, parameters['layer2.weight'], parameters['layer2.bias'])
        x = F.relu(x)
        x = F.linear(x, parameters['layer3.weight'], parameters['layer3.bias'])
        return x
    
    def copy(self, model):
        self.load_state_dict(model.state_dict())
        


In [None]:
    def forward(self, x, parameters=None):
        #probably this if.
        if parameters is None:
            x = self.layer1(x)
            x = F.relu(x)
            x = self.layer2(x)
            x = F.relu(x)
            x = self.layer3(x)
            return x
        else:
            x = F.linear(x, parameters['layer1.weight'], parameters['layer1.bias'])
            x = F.relu(x)
            x = F.linear(x, parameters['layer2.weight'], parameters['layer2.bias'])
            x = F.relu(x)
            x = F.linear(x, parameters['layer3.weight'], parameters['layer3.bias'])
            return x

In [44]:
#hold_parameters = None 
torch.autograd.set_detect_anomaly(True)
# be carefull of not ImplementError setting in example code. probably a reason.

model = SineModel()
global_parameters = { name: par for name, par in model.named_parameters()}
hold_parameters = global_parameters
optimizer = torch.optim.Adam(model.parameters())
optimizer.zero_grad()
metric = torch.nn.MSELoss()
history = []
tasks = [SineTask() for m in range(10)]
gradient_steps = 3
lr_inner = 0.01

for task in tasks:
    task_model = SineModel()
    task_model.copy(model)

    for step in range(gradient_steps):
        x, y = task.training_data()
        y_hat = task_model.forward(x, hold_parameters)
        loss = metric(y_hat, y)
        grads = torch.autograd.grad(loss, hold_parameters.values(), create_graph = True )

        # note that now you only use manual parameters, graph parameters not updated
        hold_parameters = {name:(parameter - lr_inner * grad) for ((name, parameter), grad) in zip(hold_parameters.items(), grads)}

    # task update
    print('Jo')
    x, y = task.training_data(fresh=True)
    y_hat = model.forward(x, hold_parameters)
    meta_loss = metric(y_hat, y)
    print(meta_loss.item())
    meta_loss.backward(retain_graph=True)
    optimizer.step()
    optimizer.zero_grad()
    history.append(loss.item())
    
    



Jo
10.914102554321289
Jo
0.11748529970645905


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [40, 1]], which is output 0 of TBackward, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

In [31]:
hold_parameters

{'layer1.weight': tensor([[ 5.7750e-02],
         [ 2.9777e-01],
         [ 6.9563e-02],
         [ 8.1815e-01],
         [ 3.8379e-01],
         [-2.9962e-01],
         [ 6.9574e-01],
         [ 4.8989e-04],
         [ 8.0888e-01],
         [-7.2096e-01],
         [ 3.5985e-01],
         [ 7.9879e-02],
         [-5.0667e-01],
         [ 5.0920e-01],
         [ 8.2912e-01],
         [-6.6041e-01],
         [ 9.7944e-03],
         [ 3.0080e-01],
         [-8.0946e-01],
         [ 9.4273e-01],
         [ 5.1069e-01],
         [ 5.8100e-01],
         [-2.3837e-01],
         [ 1.5779e-01],
         [ 1.0778e-01],
         [ 8.1755e-01],
         [ 3.8943e-01],
         [ 1.4335e-01],
         [-2.7332e-01],
         [-1.4913e-01],
         [-8.0380e-01],
         [-9.8003e-01],
         [ 6.7918e-01],
         [ 5.6595e-01],
         [ 3.0601e-01],
         [-3.9063e-01],
         [-2.8660e-01],
         [-5.0420e-01],
         [ 6.2248e-01],
         [ 9.8471e-01]], grad_fn=<SubBackward0>