In [1]:
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torch
from torch import nn
from torch import optim

import math

import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm as tqdm

In [2]:
from utils import gradient, jacobian, hessian

In [3]:
device = torch.device("cpu" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

# Plot Setings

In [4]:
plt.rcParams['lines.linewidth'] = 3
plt.rcParams['lines.markersize'] = 6
plt.rcParams['xtick.labelsize'] = 36
plt.rcParams['ytick.labelsize'] = 36
plt.rcParams['legend.fontsize'] = 36
plt.rcParams['axes.titlesize']=36
plt.rcParams['axes.labelsize']=36
plt.rcParams['figure.figsize'] = (24.0, 12.0)
plt.rcParams['font.size'] = 40

# Experiment

In [5]:
m = 128
n = 3

w = np.random.randn(n)

X_train = np.random.randn(m, n)
Y_train = (X_train@w).reshape([-1, 1])

X_test = np.random.randn(m, n)
Y_test = (X_test@w).reshape([-1, 1])

In [6]:
train_data = TensorDataset(torch.Tensor(X_train).to(device), torch.Tensor(Y_train).to(device))
test_data = TensorDataset(torch.Tensor(X_test).to(device), torch.Tensor(Y_test).to(device))

In [7]:
X_ts, Y_ts = test_data[:]

X_tr, Y_tr = train_data[:]

In [8]:
class Neural(nn.Module):
    def __init__(self, input_dim=10, hidden_dim=3, output_dim=1, device='cpu'):
        """
        """
        super(Neural, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.device = device

#         self.body = nn.Sequential(
#             nn.Linear(self.input_dim, self.hidden_dim),
#             nn.LeakyReLU(),
#         )
        
        self.head = nn.Linear(self.input_dim, self.output_dim)
        
        list_of_w = []
        for w in self.parameters():
            list_of_w.append(w.data.view(-1))
        self.D = len(torch.cat(list_of_w))
        self.S = torch.Tensor([0.5*self.D*(1+2*math.pi)]).mean().to(device)

        self.to(device)

        
    def predict(self, input):
        """
        Args:
            input: Tensor(batch_size x input_dim) --- the matrix of input data
            
        Returns:
            Tensor(batch_size x output_dim) --- the matrix of output data
        
        """
#         out = self.body(input)
        return self.head(input)
    
    def log_prior_w(self, w):
        """
        """
        sigma = 1.
        w = w.view(-1)
        return -0.5*(sigma**2)*torch.dot(w, w)-0.5*len(w)*math.log(2*math.pi)+len(w)*math.log(sigma)
      
    def log_priot_all(self):
        temp = 0
        for w in self.parameters():
            temp += self.log_prior_w(w)
        return temp
     
    def loglikelihood(self, batch_x, batch_y):
        """
        """
        beta = 1
        D = len(batch_y)
        return -0.5*beta*torch.sum((self.predict(batch_x) - batch_y)**2)- 0.5*D*math.log(2*math.pi) + 0.5*D*math.log(beta)
    
    def margin_likelihood(self, batch_x, batch_y):
        """
        """
        temp = self.log_priot_all() + self.loglikelihood(batch_x, batch_y)
        return temp
    
    def loss(self, batch_x, batch_y):
        out = self.predict(batch_x)
#       return torch.nn.MSELoss()(out, batch_y)
        return torch.mean((out - batch_y)**2)
    

In [9]:
model = Neural(input_dim=n, output_dim=1)
List_of_step = []

alpha = 0.0001

for ep in tqdm(range(1000)):
    generator = DataLoader(train_data, batch_size=4, shuffle=True)
    for batch_x, batch_y in generator:
        bh_size = len(batch_y)
        
        # оптимизация энтропии
        model.zero_grad()
        margin_likelihood = -model.margin_likelihood(batch_x, batch_y)

        list_of_w = []
        for w in model.parameters():
            list_of_w.append(w)
            
        model.S = (model.S+torch.log(torch.det(torch.eye(model.D, device=device) - alpha*hessian(margin_likelihood, list_of_w)))).detach()

        # оптимизация параметров сетки     
        model.zero_grad()

        optimazer = optim.SGD(model.parameters(), lr=alpha)

        loss = model.loss(batch_x, batch_y)
        loss.backward()

        optimazer.step()

    List_of_step.append((model.S.item(), model.log_priot_all().item(), model.loglikelihood(batch_x, batch_y).item()))
        
        


  7%|▋         | 70/1000 [00:04<00:56, 16.45it/s]


KeyboardInterrupt: 

In [None]:
plt.plot([item[1]+item[2] for item in List_of_step[600:]], label = 'Train')

plt.grid()
plt.legend(loc='best')
plt.show()