In [None]:
import numpy as np
import torch
from torch import nn
from torch.autograd import Variable
import torch.optim as optim

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC

In [None]:
def computeFuzzynumber(trainset, delta):
    if trainset.size == 0:
        print('The input dataset is null!')
        return
    
    group1 = trainset[trainset[:, -1] == -1, :-1]
    group2 = trainset[trainset[:, -1] == 1, :-1]
    
    mean_g1 = torch.mean(group1, dim=0)
    mean_g2 = torch.mean(group2, dim=0)
    
    max_g1 = torch.max(torch.norm(group1 - mean_g1, dim=1))
    max_g2 = torch.max(torch.norm(group2 - mean_g2, dim=1))
    
    fms = torch.zeros(trainset.shape[0], 1)
    for i in range(trainset.shape[0]):
        if trainset[i, -1] == -1:
            fms[i, 0] = 1 - (torch.norm(trainset[i, :-1] - mean_g1) / (max_g1 + delta))
        elif trainset[i, -1] == 1:
            fms[i, 0] = 1 - (torch.norm(trainset[i, :-1] - mean_g2) / (max_g2 + delta))
    
    return fms

In [None]:
import torch


def compute_fuzzy_number(trainset, delta):
    if trainset.size == 0:
        print('The input dataset is null!')
        return None
    else:
        group1 = trainset[trainset[:, -1] == -1, :-1]
        group2 = trainset[trainset[:, -1] == 1, :-1]

        mean_g1 = torch.mean(group1, dim=0)
        mean_g2 = torch.mean(group2, dim=0)

        max_g1 = torch.max(torch.norm(group1 - mean_g1, dim=1))
        max_g2 = torch.max(torch.norm(group2 - mean_g2, dim=1))

        fms = torch.zeros(trainset.shape[0])
        for i in range(trainset.shape[0]):
            if trainset[i, -1] == -1:
                fms[i] = 1 - (torch.sqrt(torch.norm(trainset[i, :-1] - mean_g1)) / (max_g1 + delta))
            if trainset[i, -1] == 1:
                fms[i] = 1 - (torch.sqrt(torch.norm(trainset[i, :-1] - mean_g2)) / (max_g2 + delta))

        return fms


def trainFSVC(train, fms, C, kernel_fn, para):
    X = train[:, :-1]
    Y = train[:, -1]
    nrow, ncol = X.shape

    Kmatrix = kernel_fn(X, X, para)

    H = torch.outer(Y, Y) * Kmatrix

    Aeq = torch.tensor(Y).unsqueeze(0)
    beq = torch.tensor([0.])

    lb = torch.zeros(nrow)
    ub = C * fms

    a0 = torch.zeros(nrow) + 0.0001

    x = torch.zeros(nrow, requires_grad=True)

    optimizer = optim.Adadelta([x])

    num_iter = 2000
    for t in range(num_iter):
        loss = torch.sum(torch.matmul(x.unsqueeze(-1), H) * x) / 2 - torch.sum(x)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        x.data = torch.min(torch.max(x.data, lb), ub)

    lamda = x.detach()

    epsilon = 1e-8
    i_sv = torch.where(torch.abs(lamda) > epsilon)[0]
    tmp = kernel_fn(X, X[i_sv, :], para) * (lamda[i_sv] * Y[i_sv])
    b = 1 / Y[i_sv] - torch.mean(tmp, dim=0)
    boundary = torch.mean(b)

    return lamda, boundary


In [None]:

def testFSVC(lamda, boundary, train, test, ker, para):
    nrow, ncol = train.shape
    nrowt, ncolt = test.shape
    
    X = torch.tensor(train[:, :ncol-1], dtype=torch.float)
    Y = torch.tensor(train[:, ncol-1], dtype=torch.float)
    
    Xt = torch.tensor(test[:, :ncolt-1], dtype=torch.float)
    
    Kmatrix = kernel(X, Xt, ker, para)
    
    tmp = torch.mm(Kmatrix, torch.mul(lamda, Y).unsqueeze(1)).squeeze(1)
    py = torch.sign(tmp + boundary).numpy()
    
    predictedY = np.hstack((test[:, ncolt-1].reshape(-1, 1), py.reshape(-1, 1)))
    
    stat = torch.zeros(18)
    for n in range(nrowt):
        if predictedY[n, 0] == -1:
            stat[0] += 1
            if predictedY[n, 1] == -1:
                stat[2] += 1
            else:
                stat[3] += 1
        if predictedY[n, 0] == 1:
            stat[1] += 1
            if predictedY[n, 1] == 1:
                stat[4] += 1
            else:
                stat[5] += 1
    
    stat[6] = stat[3] / (stat[3] + stat[4])
    stat[7] = stat[5] / (stat[5] + stat[2])
    stat[8] = (stat[2] + stat[5]) / (stat[2] + stat[3] + stat[4] + stat[5])
    stat[9] = (stat[3] + stat[4]) / (stat[2] + stat[3] + stat[4] + stat[5])
    stat[10] = (stat[3] + stat[4]) / (stat[2] + stat[3] + stat[4] + stat[5])
    stat[11] = stat[4] / (stat[3] + stat[4])
    stat[12] = stat[4] / (stat[4] + stat[5])
    stat[13] = stat[4] / (stat[4] + stat[5])
    stat[14] = stat[3] / (stat[3] + stat[2])
    stat[15] = 2 * stat[4] / (2 * stat[4] + stat[3] + stat[5])
    stat[16] = (stat[3] * stat[4] - stat[2] * stat[5]) / \
                (torch.sqrt((stat[4] + stat[5]) * (stat[2] + stat[3]) * (stat[3] + stat[5]) * (stat[2] + stat[4])) + 1e-10)

    return predictedY, stat


In [None]:



class deepMKL(nn.Module):
    def __init__(self, input_size, output_size, n_layers=3):
        super(deepMKL, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.betas = nn.Parameter(torch.ones(n_layers, 4) / 4)

    def forward(self, x, sig):
        Kf = []
        for i in range(self.n_layers):
            beta = self.betas[i]
            k = torch.exp(-torch.sum((x.unsqueeze(1) - x.unsqueeze(2)) ** 2, dim=-1) / (2 * sig ** 2))
            Kf.append(torch.matmul(k, beta))
            x = Kf[-1]
        return Kf, torch.matmul(Kf[-1], Kf[-1].t())

    def train(self, x, y, lr=1e-4, max_iter=100, c=10):
        n_samples = x.shape[0]
        dotx = torch.matmul(x, x.t())
        sig = self._determine_sig(dotx.numpy())
        optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        span = 0
        for t in range(max_iter):
            Kf, Ks = self.forward(x, sig)
            model = SVC(C=c, kernel='precomputed')
            model.fit(Ks.numpy(), y.numpy())

            if self.n_layers == 1:
                grad, span_t = self._grad_1_layer(model, Kf[0], y)
            elif self.n_layers == 2:
                grad, span_t = self._grad_2_layer(model, Kf[0], Kf[1], sig, y)
            elif self.n_layers == 3:
                grad, span_t = self._grad_3_layer(model, Kf[0], Kf[1], Kf[2], sig, y)

            self.betas.data -= lr * grad
            self.betas.data[self.betas.data < 0] = 0
            if self.betas[-1].sum() > 1:
                self.betas[-1] /= self.betas[-1].sum()

            if np.isnan(self.betas.numpy().sum()):
                raise ValueError('Learning rate is too high')
            elif t > 5 and abs(span - span_t) < 1e-4:
                break
            span = span_t

    def _determine_sig(self, dotx):
        n = dotx.shape[0]
        s = np.median(dotx)
        return np.sqrt(s / 2)

    def _grad_1_layer(self, model, Kf, y):
        K = torch.exp(-torch.sum((Kf.unsqueeze(1) - Kf.unsqueeze(2)) ** 2, dim=-1) / (2 * self.sig ** 2))
        y_pred = torch.from_numpy(model.decision_function(K.numpy())).float()
        grad = torch.zeros_like(self.betas)
        for i in range(self.n_layers):
            grad[i, 0] = torch.sum(Kf[i] * (1 - y * y_pred))
            grad[i, 1] = torch.sum(Kf[i] * (1 - y_pred ** 2))
            grad[i, 2] = torch.sum(Kf[i] * (1 - y * y_pred) * y)
            grad[i, 3] = torch.sum(Kf[i] * (1 - y * y_pred) * y ** 2)
        span = torch.norm(grad)
        return grad, span
    def _grad_2_layer(self, model, Kf1, Kf2, sig, y):
        K1 = torch.exp(-torch.sum((Kf1.unsqueeze(1) - Kf1.unsqueeze(2)) ** 2, dim=-1) / (2 * sig ** 2))
        K2 = torch.exp(-torch.sum((Kf2.unsqueeze(1) - Kf2.unsqueeze(2)) ** 2, dim=-1) / (2 * sig ** 2))
        K12 = torch.matmul(Kf1.unsqueeze(-1), Kf2.unsqueeze(-2))
        K12 = torch.exp(-torch.sum((K12 - K12.permute(0, 2, 1)) ** 2, dim=-1) / (2 * sig ** 2))

        K = self.betas[0, 0] * K1 + self.betas[0, 1] * K2 + self.betas[0, 2] * K12 + self.betas[0, 3] * torch.eye(K1.shape[0])

        y_pred = torch.from_numpy(model.decision_function(K.numpy())).float()
        grad = torch.zeros_like(self.betas)
        for i in range(self.n_layers):
            grad[i, 0] = torch.sum(K1 * (1 - y * y_pred))
            grad[i, 1] = torch.sum(K2 * (1 - y * y_pred))
            grad[i, 2] = torch.sum(K12 * (1 - y * y_pred))
            grad[i, 3] = torch.sum(torch.eye(K1.shape[0]) * (1 - y * y_pred))
        span = torch.norm(grad)
        return grad, span

    def _grad_3_layer(self, model, Kf1, Kf2, Kf3, sig, y):
        K1 = torch.exp(-torch.sum((Kf1.unsqueeze(1) - Kf1.unsqueeze(2)) ** 2, dim=-1) / (2 * sig ** 2))
        K2 = torch.exp(-torch.sum((Kf2.unsqueeze(1) - Kf2.unsqueeze(2)) ** 2, dim=-1) / (2 * sig ** 2))
        K3 = torch.exp(-torch.sum((Kf3.unsqueeze(1) - Kf3.unsqueeze(2)) ** 2, dim=-1) / (2 * sig ** 2))
        K12 = torch.matmul(Kf1.unsqueeze(-1), Kf2.unsqueeze(-2))
        K12 = torch.exp(-torch.sum((K12 - K12.permute(0, 2, 1)) ** 2, dim=-1) / (2 * sig ** 2))
        K13 = torch.matmul(Kf1.unsqueeze(-1), Kf3.unsqueeze(-2))
        K13 = torch.exp(-torch.sum((K13 - K13.permute(0, 2, 1)) ** 2, dim=-1) / (2 * sig ** 2))
        K23 = torch.matmul(Kf2.unsqueeze(-1), Kf3.unsqueeze(-2))
        K23 = torch.exp(-torch.sum((K23 - K23.permute(0, 2, 1))** 2, dim=-1) / (2 * sig ** 2))


In [None]:

def deepMKL_train(x, y, nLayers, LR=1e-4, maxI=100, C=10):
    """
    Deep Multiple Kernel Learning by Span Bound

    Inputs:
    (1) x = trainng data matrix, where rows are instances and columns are features
    (2) y = training target vector, where rows are instances
    (3) nLayers = number of layers, 1, 2 or 3
    (4) LR = learning rate (default=1E-4)
    (5) maxI = maximum number of iterations (default=100)
    (6) C = SVM penalty constant (default=10)

    Outputs:
    (1) model = LIBSVM model
    (2) net = net parameters

    Citation: Strobl EV & Visweswaran S. Deep Multiple Kernel Learning.
    ICMLA, 2013.
    """
    x = torch.tensor(x, dtype=torch.float32)
    y = torch.tensor(y, dtype=torch.float32)
    r = x.shape[0]

    # initialize weights
    betas = torch.ones(nLayers, 4) / 4

    # initialize kernels
    dotx = torch.matmul(x, x.T)
    sig = determineSig(dotx)
    Kf = computeKernels(dotx, sig, betas, nLayers)

    # alternating opt
    span = 0
    for t in range(maxI):

        # train SVM
        Ks = Kf[:, nLayers - 1].reshape(r, r)
        svc = SVC(kernel='precomputed', C=C)
        svc.fit(Ks.numpy(), y.numpy())
        model = svc

        # kernels
        K, Kf = computeKernels(dotx, sig, betas, nLayers)

        # span gradient
        if nLayers == 1:
            betas, spanT = grad1Layer(model, betas, LR, Kf, K, y)
        elif nLayers == 2:
            betas, spanT = grad2Layer(model, betas, LR, Kf, K, sig, y)
        elif nLayers == 3:
            betas, spanT = grad3Layer(model, betas, LR, Kf, K, sig, y)

        # feasible region projection
        betas[betas < 0] = 0  # non-negative
        if torch.sum(betas[-1, :]) > 1:
            betas[-1, :] = betas[-1, :] / torch.sum(betas[-1, :])  # trace final layer upper bound

        # stopping conditions
        if np.isnan(np.sum(betas)):
            raise ValueError('Learning rate is too high')
        elif torch.abs(span - spanT) < 1e-4 and t > 5:
            break
        span = spanT

    # final model
    net = {'w': betas, 'sig': sig, 'nLayers': nLayers, 'n': r}

    return model, net


In [1]:

# 导入pytorch库
import torch

# 初始化两个随机的张量，形状分别为(5, 3)和(4, 3)
x = torch.randn(5, 3) # x.shape = (5, 3)
y = torch.randn(4, 3) # y.shape = (4, 3)

# 打印x和y
print("x:")
print(x)
print("y:")
print(y)

# 对x和y分别使用unsqueeze()，参数分别为1和0
x_unsqueezed = x.unsqueeze(1) # x_unsqueezed.shape = (5, 1, 3)
y_unsqueezed = y.unsqueeze(0) # y_unsqueezed.shape = (1, 4, 3)

# 打印x_unsqueezed和y_unsqueezed
print("x_unsqueezed:")
print(x_unsqueezed)
print("y_unsqueezed:")
print(y_unsqueezed)

# 对x_unsqueezed和y_unsqueezed进行减法运算，利用广播机制
diff = x_unsqueezed - y_unsqueezed # diff.shape = (5, 4, 3)

# 打印diff
print("diff:")
print(diff)


x:
tensor([[-0.4686, -1.0732, -1.4533],
        [ 0.2133, -1.4165,  0.4282],
        [ 0.4139, -2.4091,  0.7029],
        [ 0.2786, -0.6810, -1.0052],
        [ 1.9398,  0.7333,  0.4589]])
y:
tensor([[-1.4241,  0.2257,  1.1952],
        [ 0.9051,  1.0643, -1.3562],
        [-1.8180,  1.1094, -0.3707],
        [ 2.1814,  0.9993,  1.5191]])
x_unsqueezed:
tensor([[[-0.4686, -1.0732, -1.4533]],

        [[ 0.2133, -1.4165,  0.4282]],

        [[ 0.4139, -2.4091,  0.7029]],

        [[ 0.2786, -0.6810, -1.0052]],

        [[ 1.9398,  0.7333,  0.4589]]])
y_unsqueezed:
tensor([[[-1.4241,  0.2257,  1.1952],
         [ 0.9051,  1.0643, -1.3562],
         [-1.8180,  1.1094, -0.3707],
         [ 2.1814,  0.9993,  1.5191]]])
diff:
tensor([[[ 0.9556, -1.2989, -2.6486],
         [-1.3736, -2.1375, -0.0971],
         [ 1.3495, -2.1827, -1.0826],
         [-2.6500, -2.0726, -2.9724]],

        [[ 1.6374, -1.6421, -0.7670],
         [-0.6918, -2.4807,  1.7844],
         [ 2.0313, -2.5259,  0.7989],
   

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from MKLpy.algorithms import EasyMKL
from MKLpy.utils.misc import identity_kernel
from MKLpy.preprocessing import normalization
import numpy as np

class IrisDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

    def __len__(self):
        return len(self.y)

data = load_iris()
X, Y = data.data, data.target
num_classes = len(np.unique(Y))
Yh = torch.tensor(to_categorical(Y), dtype=torch.float32)
Xtr, Xva, Ytr, Yva, Ytr_1h, Yva_1h = train_test_split(X, Y, Yh, random_state=42, shuffle=True, test_size=.3)


learning_rate = 1e-5
batch_size    = 32
activation    = nn.Sigmoid()
num_hidden    = 10
num_neurons   = 128
max_epochs    = 100

train_data = IrisDataset(Xtr, Ytr_1h)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)






NameError: name 'to_categorical' is not defined

In [None]:
class MLP(nn.Module):
    def __init__(self, num_hidden, num_neurons, num_classes, activation):
        super(MLP, self).__init__()
        self.num_hidden = num_hidden
        self.num_neurons = num_neurons
        self.num_classes = num_classes
        self.activation = activation
        self.layers = nn.ModuleList()
        for l in range(1, self.num_hidden+1):
            layer = nn.Linear(num_neurons, num_neurons)
            self.layers.append(layer)
        self.classification_layer = nn.Linear(num_neurons, num_classes)

    def forward(self, x):
        for layer in self.layers:
            x = self.activation(layer(x))
        x = self.classification_layer(x)
        return x

model = MLP(num_hidden=num_hidden, num_neurons=num_neurons, num_classes=num_classes, activation=activation)

train_data = IrisDataset(Xtr, Ytr_1h)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

test_data = IrisDataset(Xva, Yva_1h)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

optimizer = optim.SGD(model.parameters(), lr=learning_rate)
reduce_lr  = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.2, patience=5, min_lr=0.001)
earlystop  = callb.EarlyStopping(
    monitor='val_loss',patience=10, mode='min',verbose=1)

In [None]:
for epoch in range(max_epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = nn.MSELoss()(output, target)
        loss.backward()
        optimizer.step()
    model.eval()
    with torch.no_grad():
        val_loss = nn.MSELoss()(model(torch.tensor(Xva, dtype=torch.float32)), Yva_1h)
        reduce_lr.step(val_loss)

#representations extraction and kernels definition
train_representations = []
test_representations = []

model.eval()

with torch.no_grad():
    for data in train_loader:
        inputs, _ = data
        representations = model.layers[0](inputs)
        for i in range(1, len(model.layers)):
            representations = model.layers[i](model.activation(representations))
        train_representations.append(representations)

    for data in test_loader:
        inputs, _ = data
        representations = model.layers[0](inputs)
        for i in range(1, len(model.layers)):
            representations = model.layers[i](model.activation(representations))
        test_representations.append(representations)

train_representations = torch.cat(train_representations).numpy()
test_representations = torch.cat(test_representations).numpy()

weights = EasyMKL().fit(normalization(train_representations, axis=0), Ytr)
kernel = lambda X1, X2: weights.dot(identity_kernel(X1, X2))
Ktr = kernel(train_representations, train_representations)
Kva = kernel(test_representations, train_representations)


In [None]:
接下来，我们需要将从MLP的最后一层（也就是输出层）获取的特征表示提取出来，然后将这些特征表示作为EasyMKL算法的输入，得到最终的预测结果。

我们可以通过在MLP模型中添加一层来提取特征表示，然后将这些特征表示输入EasyMKL算法。具体地，我们可以在MLP模型的最后一层（输出层）之前添加一层，使其输出的是特征表示而不是最终的预测结果。这一层可以是任何合适的层，例如全连接层或池化层。

在本例中，我们可以添加一个全连接层作为特征提取层。具体来说，我们可以在MLP类的构造函数中添加以下代码：

self.feature_layer = nn.Linear(num_neurons, num_neurons)

#在前向传递函数中，我们在输出层之前将数据传递到特征提取层，并将其结果用于EasyMKL算法。修改后的前向传递函数如下所示：

def forward(self, x):
    for layer in self.layers:
        x = self.activation(layer(x))
        features = self.feature_layer(x)
        x = self.classification_layer(x)
    return x, features

#我们还需要修改训练循环，以便我们可以在训练过程中提取特征表示，并使用EasyMKL算法进行预测。具体来说，我们需要在每个训练循环中，使用MLP模型来预测训练数据和测试数据的特征表示，并将这些特征表示传递给EasyMKL算法进行训练和预测。修改后的训练循环如下所示：

train_kernel = None
test_kernel = None
for epoch in range(max_epochs):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output, features = model(data)
        loss = nn.MSELoss()(output, target)
        loss.backward()
        optimizer.step()

        # Compute kernel matrices
        if train_kernel is None:
            train_kernel = identity_kernel(features.detach().numpy())
        else:
            train_kernel += identity_kernel(features.detach().numpy())

    # Compute kernel matrices for test set
    test_features = model(torch.tensor(Xte, dtype=torch.float32))[1].detach().numpy()
    if test_kernel is None:
        test_kernel = identity_kernel(test_features)
    else:
        test_kernel += identity_kernel(test_features)
    #Normalize kernel matrices
    train_kernel = normalization(train_kernel)
    test_kernel = normalization(test_kernel)

    #Train and predict using EasyMKL
    clf = EasyMKL(lam=0.1)
    clf.fit(train_kernel, Ytr)
    Yte_pred = clf.predict(test_kernel)

最终的预测结果存储在Yte_pred中。

In [3]:
!conda info -e

# conda environments:
#
base                  *  D:\Software\Anaconda
dlenv                    D:\Software\Anaconda\envs\dlenv

