In [None]:
import torch
from torch import nn
from torch.autograd import Variable
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC


class deepMKL(nn.Module):
    def __init__(self, input_size, output_size, n_layers=3):
        super(deepMKL, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.betas = nn.Parameter(torch.ones(n_layers, 4) / 4)

    def forward(self, x, sig):
        Kf = []
        for i in range(self.n_layers):
            beta = self.betas[i]
            k = torch.exp(-torch.sum((x.unsqueeze(1) - x.unsqueeze(2)) ** 2, dim=-1) / (2 * sig ** 2))
            Kf.append(torch.matmul(k, beta))
            x = Kf[-1]
        return Kf, torch.matmul(Kf[-1], Kf[-1].t())

    def train(self, x, y, lr=1e-4, max_iter=100, c=10):
        n_samples = x.shape[0]
        dotx = torch.matmul(x, x.t())
        sig = self._determine_sig(dotx.numpy())
        optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        span = 0
        for t in range(max_iter):
            Kf, Ks = self.forward(x, sig)
            model = SVC(C=c, kernel='precomputed')
            model.fit(Ks.numpy(), y.numpy())

            if self.n_layers == 1:
                grad, span_t = self._grad_1_layer(model, Kf[0], y)
            elif self.n_layers == 2:
                grad, span_t = self._grad_2_layer(model, Kf[0], Kf[1], sig, y)
            elif self.n_layers == 3:
                grad, span_t = self._grad_3_layer(model, Kf[0], Kf[1], Kf[2], sig, y)

            self.betas.data -= lr * grad
            self.betas.data[self.betas.data < 0] = 0
            if self.betas[-1].sum() > 1:
                self.betas[-1] /= self.betas[-1].sum()

            if np.isnan(self.betas.numpy().sum()):
                raise ValueError('Learning rate is too high')
            elif t > 5 and abs(span - span_t) < 1e-4:
                break
            span = span_t

    def _determine_sig(self, dotx):
        n = dotx.shape[0]
        s = np.median(dotx)
        return np.sqrt(s / 2)

    def _grad_1_layer(self, model, Kf, y):
        K = torch.exp(-torch.sum((Kf.unsqueeze(1) - Kf.unsqueeze(2)) ** 2, dim=-1) / (2 * self.sig ** 2))
        y_pred = torch.from_numpy(model.decision_function(K.numpy())).float()
        grad = torch.zeros_like(self.betas)
        for i in range(self.n_layers):
            grad[i, 0] = torch.sum(Kf[i] * (1 - y * y_pred))
            grad[i, 1] = torch.sum(Kf[i] * (1 -
