In [1]:
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_excel(r"C:\Users\duduu\Desktop\2014-2022年8月空气污染数据(均值填充).xlsx")
df

Unnamed: 0,日期,AQI,质量等级,PM2.5,PM10,SO2,CO,NO2,O3_8h,Unnamed: 9,Unnamed: 10,Unnamed: 11,年份,AQI平均值,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19
0,2013-12-02,180,中度污染,136,253,87,2.0,120,25,,,,,,,,,,,
1,2013-12-03,234,重度污染,184,297,108,2.5,133,17,,,,,,,,,,,
2,2013-12-04,386,严重污染,336,455,51,2.5,130,43,,,,,,,,,,,
3,2013-12-05,328,严重污染,278,386,52,2.3,110,59,,,,,,,,,,,
4,2013-12-06,319,严重污染,269,389,61,2.2,92,36,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3187,2022-08-24,40,优,8,33,5,0.4,13,79,,,,,,,,,,,
3188,2022-08-25,59,良,18,48,6,0.6,21,110,,,,,,,,,,,
3189,2022-08-26,68,良,19,40,6,0.5,22,121,,,,,,,,,,,
3190,2022-08-27,49,优,28,36,5,0.6,15,97,,,,,,,,,,,


In [5]:
#划分数据
from sklearn.model_selection import train_test_split
def data_one(data):
    for i in range(data.shape[1]):
        ma = torch.max(data[:,i])
        mi = torch.min(data[:,i])
        cha = ma-mi
        for k in range(data.shape[0]):
            data[k,i] = (data[k,i]-mi)/cha
    return data
x = df.loc[:,("SO2","CO","NO2","PM10")].values
y = df.loc[:,("PM2.5","O3_8h")].values
x = torch.tensor(x,dtype=torch.float32)
y = torch.tensor(y,dtype=torch.float32)
x = data_one(x)
y = data_one(y)
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [None]:
class share_model:

    def __init__(self,lr=0.1,hidden_layer=3,epoches=50):
        self.lr = lr
        self.hidden_layer = hidden_layer
        self.epoches = epoches

    def fit(self,x,y):
        m = x.shape[0]
        input_feature = x.shape[1]
        output_feature = y.shape[1]
        w1 = torch.randn(input_feature,self.hidden_layer,requires_grad=True)
        b1 = torch.zeros(self.hidden_layer,requires_grad=True)
        w2 = torch.randn(self.hidden_layer,output_feature,requires_grad=True)
        b2 = torch.randn(output_feature,requires_grad=True)
        func = torch.nn.ReLU()
        for epoch in range(self.epoches):
            for x1,y1 in TensorDataset(x,y):
                y_hat = func(func(x1@w1+b1)@w2+b2)
                loss = F.mse_loss(y_hat,y1)
                loss.backward()
                w1.data -= self.lr*w1.grad.data/m
                w2.data -= self.lr*w2.grad.data/m
                b1.data -= self.lr*b1.grad.data/m
                b2.data -= self.lr*b2.grad.data/m
                w1.grad.data.zero_()
                b1.grad.data.zero_()
                w2.grad.data.zero_()
                b2.grad.data.zero_()
            if epoch%10==0:
                print("epoch:%d ,loss:%f"%(epoch,loss))
        self.parameters = {
                    "w1":w1,
                    "b1":b1,
                    "w2":w2,
                    "b2":b2}

        return self.parameters

    def predict(self,x):
        w1 = self.parameters["w1"]
        w2 = self.parameters["w2"]
        b1 = self.parameters["b1"]
        b2 = self.parameters["b2"]
        func = torch.nn.ReLU()
        y_pred = func(func(x@w1+b1)@w2+b2)
        return y_pred

            


In [None]:
class new_share_model:

    def __init__(self,lr=0.1,hidden_layer=3,epoches=100,threshold=2):
        self.lr = lr
        self.hidden_layer = hidden_layer
        self.epoches = epoches
        self.threshold = threshold

    def fit(self,x,y):
        m = x.shape[0]
        batch_size = int(x.shape[0]*0.1)
        input_feature = x.shape[1]
        output_feature = y.shape[1]
        w1 = torch.randn(input_feature,self.hidden_layer,requires_grad=True)
        b1 = torch.zeros(self.hidden_layer,requires_grad=True)
        w2 = torch.randn(self.hidden_layer,output_feature,requires_grad=True)
        b2 = torch.randn(output_feature,requires_grad=True)
        func = torch.nn.ReLU()
        for epoch in range(self.epoches):
            for x_batch,y_batch in DataLoader(TensorDataset(x,y),batch_size,shuffle=True):
                m1 = x_batch.shape[0]
                y_hat = func(func(x_batch@w1+b1)@w2+b2)
                loss = F.mse_loss(y_hat,y_batch)
                loss1 = F.mse_loss(y_hat[:,0],y_batch[:,0])
                loss2 = F.mse_loss(y_hat[:,1],y_batch[:,1])
                loss.backward(retain_graph=True)
                with torch.no_grad():
                    w1.data -= self.lr*w1.grad.data*(m1/m)
                    w2.data -= self.lr*w2.grad.data*(m1/m)
                    b1.data -= self.lr*b1.grad.data*(m1/m)
                    b2.data -= self.lr*b2.grad.data*(m1/m)
                w1.grad.data.zero_()
                b1.grad.data.zero_()
                w2.grad.data.zero_()
                b2.grad.data.zero_()
                loss1.backward(retain_graph=True)
                with torch.no_grad():
                    grad1_w1 = w1.grad.data.clone()
                    w1.grad.data.zero_()
                    b1.grad.data.zero_()
                    w2.grad.data.zero_()
                    b2.grad.data.zero_()
                loss2.backward()
                with torch.no_grad():
                    grad2_w1 = w1.grad.data.clone()
                    w1.grad.data.zero_()
                    b1.grad.data.zero_()
                    w2.grad.data.zero_()
                    b2.grad.data.zero_()
                count = torch.zeros(input_feature,self.hidden_layer)
                for i in range(grad1_w1.shape[0]):
                    for k in range(grad1_w1.shape[1]):
                        if (grad1_w1[i,k]>=0 and grad2_w1[i,k]<0) or (grad1_w1[i,k]<0 and grad2_w1[i,k]>=0):
                            count[i,k] += 1
                if torch.max(count) >= self.threshold:
                    index = (torch.argmax(count)+1)%self.hidden_layer
                    if index == 0:
                        index = self.hidden_layer-1
                    else:
                        index -= 1
                    self.index = index
                    break
            if epoch%10==0:
                print("epoch:%d , loss:%f"%(epoch,loss))
            #
            if torch.max(count)>=self.threshold:
                w1_new = w1[:,index].reshape(input_feature,1)
                b1_new = b1[index].reshape(1)
                w1 = torch.cat([w1,w1_new],1)
                b1 = torch.cat([b1,b1_new])
                w2 = torch.cat([w2,w2[index,:].reshape(1,2)],0)
                w21 = w2[:,0]
                w21 = w21[torch.arange(w21.size(0))!=index] 
                w22 = w2[:,1]
                w22 = w22[torch.arange(w22.size(0))!=index]
                for epoch1 in range(self.epoches):
                    for x_batch,y_batch in DataLoader(TensorDataset(x,y),batch_size,shuffle=True):
                        m = x.shape[0]
                        hidden = func(x_batch@w1+b1)#m*4
                        y1_hat = func(hidden[:,0:-1]@w21+b2[0]).reshape(-1,1)
                        y2_hat = func(torch.cat([hidden[:,:index],hidden[:,index+1:]],1)@w22+b2[1]).reshape(-1,1)
                        y_hat = torch.cat([y1_hat,y2_hat],1)
                        loss = F.mse_loss(y_hat,y_batch)
                        loss.backward()
                        m1 = x_batch.shape[0]
                        with torch.no_grad():
                            print(w1.grad)
                            w1.data -= self.lr*w1.grad.data*(m1/m)
                            w21.data -= self.lr*w21.grad.data*(m1/m)
                            w22.data -= self.lr*w22.grad.data*(m1/m)
                            b1.data -= self.lr*b1.grad.data*(m1/m)
                            b2.data -= self.lr*b2.grad.data*(m1/m)
                        w1.grad.data.zero_()
                        b1.grad.data.zero_()
                        w21.grad.data.zero_()
                        w22.grad.data.zero_()
                        b2.grad.data.zero_()
                    if epoch1%10==0:
                        print("更新节点epoch:%d , loss:%f"%(epoch1,loss))
                
                self.parameters = {
                        "w1":w1,
                        "b1":b1,
                        "w21":w21,
                        "w22":w22,
                        "b2":b2}
            if torch.max(count)>=self.threshold:
                break     
            if epoch == self.epoches-1:
                self.parameters = {
                        "w1":w1,
                        "b1":b1,
                        "w2":w2,
                        "b2":b2}
        

        return self.parameters

    def predict1(self,x):
        w1 = self.parameters["w1"]
        w2 = self.parameters["w2"]
        b1 = self.parameters["b1"]
        b2 = self.parameters["b2"]
        func = torch.nn.ReLU()
        y_pred = func(func(x@w1+b1)@w2+b2)
        return y_pred

    def predict2(self,x):
        w1 = self.parameters["w1"]
        w21 = self.parameters["w21"]
        w22 = self.parameters["w22"]
        b1 = self.parameters["b1"]
        b2 = self.parameters["b2"]
        func = torch.nn.ReLU()
        hidden = func(x@w1+b1)#m*4
        y1_hat = func(hidden[:,0:-1]@w21+b2[0])
        y2_hat = func(torch.cat([hidden[:,:self.index],hidden[:,self.index+1:]],1)@w22+b2[1])
        y_pred = torch.cat([y1_hat,y2_hat],1)
        return y_pred
        
            

In [88]:
#模拟均匀数据
n = 3000#n为样本量，后文同理
from sklearn.model_selection import train_test_split
def data_one(data):
    for i in range(data.shape[1]):
        ma = torch.max(data[:,i])
        mi = torch.min(data[:,i])
        cha = ma-mi
        # print(ma,mi,min(data[:,i]))
        for k in range(data.shape[0]):
            data[k,i] = (data[k,i]-mi)/cha
    return data
x = torch.tensor(np.random.uniform(0,8,(n,5)),dtype=torch.float32)
w1 = torch.tensor(np.random.uniform(0,4,(5,4)),dtype=torch.float32)
w2 = torch.tensor(np.random.uniform(0,4,(4,2)),dtype=torch.float32)
noise = torch.randn(n,2)
y = data_one(x@w1@w2+noise)
x = data_one(x)
x_train,x_test,y_train,y_test = train_test_split(x,y)

count = 0
for i in range(2):
    for k in range(y.shape[0]):
        if y[k,i] >1:
            count += 1

In [7]:
#正态分布
n = 1000
x = torch.tensor(np.random.normal(0,1,(n,5)),dtype=torch.float32)
w1 = torch.tensor(np.random.normal(0,1,(5,4)),dtype=torch.float32)
w2 = torch.tensor(np.random.normal(0,1,(4,2)),dtype=torch.float32)
noise = torch.normal(0,0.2,(n,2))
y = data_one(x@w1@w2+noise)
x = data_one(x)
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [76]:
#指数分布
n = 3000
x = torch.tensor(np.random.exponential(10,(n,5)),dtype=torch.float32)
w1 = torch.normal(0,1,(5,4),dtype=torch.float32)
w2 = torch.normal(0,1,(4,2),dtype=torch.float32)
noise = torch.normal(0,1,(n,2))
y = data_one(x@w1@w2+noise)
x = data_one(x)
x_train,x_test,y_train,y_test = train_test_split(x,y)

In [6]:
#硬参数网络搭建
class MTLnet(nn.Module):
    def __init__(self):
        super(MTLnet, self).__init__()

        self.sharedlayer = nn.Sequential(
            nn.Linear(4, 4),
            nn.Sigmoid(),
            nn.Linear(4,3),
            nn.Sigmoid(),
            # nn.Dropout()
        )
        self.y1 = nn.Sequential(
            nn.Linear(3, 1),
            nn.Sigmoid(),
            # nn.Dropout(),
        )
        self.y2 = nn.Sequential(
            nn.Linear(3, 1),
            nn.Sigmoid(),
            # nn.Dropout(),
        )

    def forward(self, x):
        h_shared = self.sharedlayer(x)
        y1 = self.y1(h_shared)
        y2 = self.y2(h_shared)
        if len(y1)==1:
            return torch.cat([y1, y2])
        else:
            return torch.cat([y1,y2],1)

def param_read():
    count = 0
    for name,param in model.named_parameters():
        if count==2:
            a1 = param.grad.data.clone().T
        count += 1
    return a1

def param_read1():
    count = 0
    for name,param in model1.named_parameters():
        if count==2:
            a1 = param.grad.data.clone().T
        count += 1
    return a1

def param_read2():
    count = 0
    for name,param in model2.named_parameters():
        if count==2:
            a1 = param.grad.data.clone().T
        count += 1
    return a1

def matr(m1,m2):
    return (m1>=0) == (m2>=0)

def index_func(x):
    if x == 0:
        return [0,2,3],[1,2,3]
    elif x == 1:
        return [0,1,3],[1,2,3]
    else:
        return [0,1,2],[1,2,3]

                

In [10]:
model = MTLnet()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
cm1 = torch.zeros(4,3)
threshold = 10
for epoch in range(200):
    for x_batch,y_batch in TensorDataset(x_train,y_train):
        y_hat = model.forward(x_batch)
        #计算各损失函数
        loss_f = nn.MSELoss(reduction='none')
        loss = loss_f(y_batch,y_hat).sum()
        # print(y_batch,y_hat,loss)
        loss1 = loss_f(y_batch[0],y_hat[0])
        loss2 = loss_f(y_batch[1],y_hat[1])
        optimizer.zero_grad()
        #对任务1损失函数进行反向传播
        loss1.backward(retain_graph=True)
        w2_grad1 = param_read()
        optimizer.zero_grad()
        #对任务2损失函数进行反向传播
        loss2.backward(retain_graph=True)
        w2_grad2 = param_read()
        optimizer.zero_grad()
        #对总损失进行反向传播并更新参数
        loss.backward()
        # optimizer.zero_grad()
        optimizer.step()
        # print(w2_grad1,w2_grad2)
        #求梯度方向异号位置
        dm = matr(w2_grad1,w2_grad2)
        cm2 = cm1.clone()
        for i in range(dm.shape[0]):
            for k in range(dm.shape[1]):
                if dm[i,k]==False:
                    cm1[i,k] += 1 
                if cm1[i,k]!=cm2[i,k]+1:
                    cm1[i,k]=0
        # print(dm)
        if torch.max(cm1) >= threshold and epoch+1>=80:
            index = (torch.argmax(cm1)+1)%3
            if index == 0:
                index = 3-1
            else:
                index -= 1
            break

    if (epoch+1)%10==0:
        y_pred = model.forward(x_test)
        loss = ((y_pred-y_test)**2).sum()/(len(y_pred)*2)
        y_pred1 = model.forward(x_train)
        loss1 = ((y_pred1-y_train)**2).sum()/(len(y_pred1)*2)
        print("epoch:%d , test_loss:%f , train_loss:%f"%(epoch+1,loss,loss1))

    if torch.max(cm1) >= threshold and epoch+1 >= 80:
        
        print("达到终止条件，迭代训练终止")
        print(f"复制的结点序号为：{index}")
        break

epoch:10 , test_loss:0.015810 , train_loss:0.015401
epoch:20 , test_loss:0.015330 , train_loss:0.014803
epoch:30 , test_loss:0.015099 , train_loss:0.014549
epoch:40 , test_loss:0.014935 , train_loss:0.014370
epoch:50 , test_loss:0.014802 , train_loss:0.014212
epoch:60 , test_loss:0.014668 , train_loss:0.014037
epoch:70 , test_loss:0.014524 , train_loss:0.013861
epoch:80 , test_loss:0.014568 , train_loss:0.013976
达到终止条件，迭代训练终止
复制的结点序号为：1


In [11]:
#第一次复制节点
class MTLnet1(nn.Module):
    def __init__(self):
        super(MTLnet1, self).__init__()

        self.sharedlayer = nn.Sequential(
            nn.Linear(4, 4),
            nn.Sigmoid(),
            nn.Linear(4,4),
            nn.Sigmoid(),
            # nn.Dropout()
        )
        self.y1 = nn.Sequential(
            nn.Linear(3, 1),
            nn.Sigmoid(),
            # nn.Dropout(),
        )
        self.y2 = nn.Sequential(
            nn.Linear(3, 1),
            nn.Sigmoid(),
            # nn.Dropout(),
        )

    def forward(self, x):
        h_shared = self.sharedlayer(x)
        index_list1,index_list2 = index_func(index)
        if x.shape[0] == 4:
            y1 = self.y1(h_shared[index_list1])
            y2 = self.y2(h_shared[index_list2])
        else:
            y1 = self.y1(h_shared[:,index_list1])
            y2 = self.y2(h_shared[:,index_list2])
        if len(y1)==1:
            return torch.cat([y1, y2])
        else:
            return torch.cat([y1,y2],1)

model1 = MTLnet1()
optimizer = torch.optim.Adam(model1.parameters(), lr=0.005)
w0 = model.state_dict()["sharedlayer.0.weight"].T
b0 = model.state_dict()["sharedlayer.0.bias"]
w1 = model.state_dict()["sharedlayer.2.weight"].T
b1 = model.state_dict()["sharedlayer.2.bias"]
w21 = model.state_dict()["y1.0.weight"].T
w22 = model.state_dict()["y2.0.weight"].T
b21 = model.state_dict()["y1.0.bias"]
b22 = model.state_dict()["y2.0.bias"]
index_list = [0,1,2]
index_list.pop(index)
w1 = torch.cat([w1[:,index].reshape(4,1),w1],1)
b1 = torch.cat([b1[index].reshape(1),b1])
w21 = torch.cat([w21[index].reshape(1,1),w21[index_list]])
count = 0
for m in model1.parameters():
    if count == 0:
        m.data = w0.T
    if count == 1:
        m.data = b0
    if count == 2:
        m.data = w1.T
    if count == 3:
        m.data = b1
    if count == 4:
        m.data = w21.T
    if count == 5:
        m.data = b21
    if count == 6:
        m.data = w22.T
    if count == 7:
        m.data = b22
    count += 1

# print(model1.forward(x_test))
# print(model.forward(x_test))

cm1 = torch.zeros(4,4)
threshold = 8
for epoch in range(50):
    for x_batch,y_batch in TensorDataset(x_train,y_train):
        y_hat = model1.forward(x_batch)
        #计算各损失函数
        loss_f = nn.MSELoss(reduction='none')
        loss = loss_f(y_batch,y_hat).sum()
        # print(y_batch,y_hat,loss)
        loss1 = loss_f(y_batch[0],y_hat[0])
        loss2 = loss_f(y_batch[1],y_hat[1])
        optimizer.zero_grad()
        #对任务1损失函数进行反向传播
        loss1.backward(retain_graph=True)
        w2_grad1 = param_read1()
        optimizer.zero_grad()
        #对任务2损失函数进行反向传播
        loss2.backward(retain_graph=True)
        w2_grad2 = param_read1()
        optimizer.zero_grad()
        #对总损失进行反向传播并更新参数
        loss.backward()
        optimizer.step()
        # print(w2_grad1,w2_grad2)
        #求梯度方向异号位置
        dm = matr(w2_grad1,w2_grad2)
        cm2 = cm1.clone()
        for i in range(dm.shape[0]):
            for k in range(dm.shape[1]):
                if k == 0 or k == index+1:
                    continue
                if dm[i,k]==False:
                    cm1[i,k] += 1 
                if cm1[i,k]!=cm2[i,k]+1:
                    cm1[i,k]=0
        # print(dm)
        if torch.max(cm1) >= threshold and epoch+1 >= 50:
            index1 = (torch.argmax(cm1)+1)%4
            if index1 == 0:
                index1 = 4-1
            else:
                index1 -= 1
            break

    if (epoch+1)%5==0:
        y_pred = model1.forward(x_test)
        loss = ((y_pred-y_test)**2).sum()/(len(y_pred)*2)
        y_pred1 = model1.forward(x_train)
        loss1 = ((y_pred1-y_train)**2).sum()/(len(y_pred1)*2)
        print("epoch:%d , test_loss:%f , train_loss:%f"%(epoch+1,loss,loss1))

    if torch.max(cm1) >= threshold and epoch+1 >= 50:
        print("达到终止条件，迭代训练终止")
        print(f"复制的结点序号为：{index1}")
        break

epoch:5 , test_loss:0.014299 , train_loss:0.013645
epoch:10 , test_loss:0.014256 , train_loss:0.013587
epoch:15 , test_loss:0.014224 , train_loss:0.013539
epoch:20 , test_loss:0.014197 , train_loss:0.013498
epoch:25 , test_loss:0.014174 , train_loss:0.013460
epoch:30 , test_loss:0.014154 , train_loss:0.013423
epoch:35 , test_loss:0.014137 , train_loss:0.013389
epoch:40 , test_loss:0.014122 , train_loss:0.013359
epoch:45 , test_loss:0.014110 , train_loss:0.013333
epoch:50 , test_loss:0.013916 , train_loss:0.013189
达到终止条件，迭代训练终止
复制的结点序号为：3


In [12]:
#第二次复制结点
class MTLnet2(nn.Module):
    def __init__(self):
        super(MTLnet2, self).__init__()

        self.sharedlayer = nn.Sequential(
            nn.Linear(5,4),
            nn.Sigmoid(),
            nn.Linear(4,5),
            nn.Sigmoid(),
            # nn.Dropout()
        )
        self.y1 = nn.Sequential(
            nn.Linear(3,1),
            nn.Sigmoid(),
            # nn.Dropout(),
        )
        self.y2 = nn.Sequential(
            nn.Linear(3,1),
            nn.Sigmoid(),
            # nn.Dropout(),
        )

    def forward(self,x):
        h_shared = self.sharedlayer(x)
        a = [0,1,2]
        a.remove(index)
        a.remove(index1-1)
        b = a[0]+2
        index_list1 = [1,0,b]
        index_list2 = [2,3,4]
        if x.shape[0] == 4:
            y1 = self.y1(h_shared[index_list1])
            y2 = self.y2(h_shared[index_list2])
        else:
            y1 = self.y1(h_shared[:,index_list1])
            y2 = self.y2(h_shared[:,index_list2])
        if len(y1)==1:
            return torch.cat([y1,y2])
        else:
            return torch.cat([y1,y2],1)
model2 = MTLnet2()
optimizer = torch.optim.Adam(model2.parameters(), lr=0.005)
w0 = model1.state_dict()["sharedlayer.0.weight"].T
b0 = model1.state_dict()["sharedlayer.0.bias"]
w1 = model1.state_dict()["sharedlayer.2.weight"].T
w1 = torch.cat([w1[:,index1].reshape(4,1),w1],1)
b1 = model1.state_dict()["sharedlayer.2.bias"]
b1 = torch.cat([b1[index1].reshape(1),b1])
w21 = model1.state_dict()["y1.0.weight"].T
index_list = [0,1,2]
index_list.pop(index1-1)
w21 = torch.cat([w21[index1-1].reshape(1,1),w21[index_list]])
w22 = model1.state_dict()["y2.0.weight"].T
b21 = model1.state_dict()["y1.0.bias"]
b22 = model1.state_dict()["y2.0.bias"]
count = 0
for m in model2.parameters():
    if count == 0:
        m.data = w0.T
    if count == 1:
        m.data = b0
    if count == 2:
        m.data = w1.T
    if count == 3:
        m.data = b1
    if count == 4:
        m.data = w21.T
    if count == 5:
        m.data = b21
    if count == 6:
        m.data = w22.T
    if count == 7:
        m.data = b22
    count += 1

# print(model1.forward(x_test))
# print(model2.forward(x_test))
# print(model2.state_dict()["sharedlayer.2.weight"].T)
# print(model1.state_dict()["sharedlayer.2.weight"].T)
# print(w21)
# print(model1.state_dict()["y1.0.weight"].T)

cm1 = torch.zeros(4,5)
threshold = 8
for epoch in range(100):
    for x_batch,y_batch in TensorDataset(x_train,y_train):
        y_hat = model2.forward(x_batch)
        #计算各损失函数
        loss_f = nn.MSELoss(reduction='none')
        loss = loss_f(y_batch,y_hat).sum()
        # print(y_batch,y_hat,loss)
        loss1 = loss_f(y_batch[0],y_hat[0])
        loss2 = loss_f(y_batch[1],y_hat[1])
        optimizer.zero_grad()
        #对任务1损失函数进行反向传播
        loss1.backward(retain_graph=True)
        w2_grad1 = param_read2()
        optimizer.zero_grad()
        #对任务2损失函数进行反向传播
        loss2.backward(retain_graph=True)
        w2_grad2 = param_read2()
        optimizer.zero_grad()
        #对总损失进行反向传播并更新参数
        loss.backward()
        optimizer.step()
        # print(w2_grad1,w2_grad2)
        #求梯度方向异号位置
        dm = matr(w2_grad1,w2_grad2)
        cm2 = cm1.clone()
        for i in range(dm.shape[0]):
            for k in range(dm.shape[1]):
                if k == 0 or k == 1 or k == index+2 or k == index1+1:
                    continue
                if dm[i,k]==False:
                    cm1[i,k] += 1 
                if cm1[i,k]!=cm2[i,k]+1:
                    cm1[i,k]=0
        # print(dm)
        if torch.max(cm1) >= threshold and epoch>=50:
            index2 = (torch.argmax(cm1)+1)%5
            if index2 == 0:
                index2 = 5-1
            else:
                index2 -= 1
            break

    if (epoch+1)%5==0:
        y_pred = model2.forward(x_test)
        loss = ((y_pred-y_test)**2).sum()/(len(y_pred)*2)
        y_pred1 = model2.forward(x_train)
        loss1 = ((y_pred1-y_train)**2).sum()/(len(y_pred1)*2)
        print("epoch:%d , test_loss:%f , train_loss:%f"%(epoch+1,loss,loss1))

    if torch.max(cm1) >= threshold and epoch>=50:
        print("达到终止条件，迭代训练终止")
        print(f"复制的结点序号为：{index2}")
        break

epoch:5 , test_loss:0.014090 , train_loss:0.013307
epoch:10 , test_loss:0.014080 , train_loss:0.013278
epoch:15 , test_loss:0.014074 , train_loss:0.013258
epoch:20 , test_loss:0.014069 , train_loss:0.013241
epoch:25 , test_loss:0.014064 , train_loss:0.013227
epoch:30 , test_loss:0.014061 , train_loss:0.013215
epoch:35 , test_loss:0.014058 , train_loss:0.013204
epoch:40 , test_loss:0.014055 , train_loss:0.013195
epoch:45 , test_loss:0.014051 , train_loss:0.013186
epoch:50 , test_loss:0.014048 , train_loss:0.013178
达到终止条件，迭代训练终止
复制的结点序号为：2


In [13]:
#第三次复制节点
class MTLnet3(nn.Module):
    def __init__(self):
        super(MTLnet3, self).__init__()

        self.sharedlayer = nn.Sequential(
            nn.Linear(4, 4),
            nn.Sigmoid(),
            nn.Linear(4,6),
            nn.Sigmoid(),
            # nn.Dropout()
        )
        self.y1 = nn.Sequential(
            nn.Linear(3, 1),
            nn.Sigmoid(),
            # nn.Dropout(),
        )
        self.y2 = nn.Sequential(
            nn.Linear(3, 1),
            nn.Sigmoid(),
            # nn.Dropout(),
        )

    def forward(self,x):
        h_shared = self.sharedlayer(x)
        index_list1 = [0,1,2]
        index_list2 = [3,4,5]
        if x.shape[0] == 4:
            y1 = self.y1(h_shared[index_list1])
            y2 = self.y2(h_shared[index_list2])
        else:
            y1 = self.y1(h_shared[:,index_list1])
            y2 = self.y2(h_shared[:,index_list2])
        if len(y1)==1:
            return torch.cat([y1,y2])
        else:
            return torch.cat([y1,y2],1)
model3 = MTLnet3()
optimizer = torch.optim.Adam(model3.parameters(), lr=0.001)
w0 = model2.state_dict()["sharedlayer.0.weight"].T
b0 = model2.state_dict()["sharedlayer.0.bias"]
w1 = model2.state_dict()["sharedlayer.2.weight"].T
w1 = torch.cat([w1[:,index2].reshape(4,1),w1],1)
b1 = model2.state_dict()["sharedlayer.2.bias"]
b1 = torch.cat([b1[index2].reshape(1),b1])
w21 = model2.state_dict()["y1.0.weight"].T
index_list = [0,1,2]
index_list.pop(index2-2)
w21 = torch.cat([w21[index2-2].reshape(1,1),w21[index_list]])
w22 = model2.state_dict()["y2.0.weight"].T
b21 = model2.state_dict()["y1.0.bias"]
b22 = model2.state_dict()["y2.0.bias"]

count = 0
for m in model3.parameters():
    if count == 0:
        m.data = w0.T
    if count == 1:
        m.data = b0
    if count == 2:
        m.data = w1.T
    if count == 3:
        m.data = b1
    if count == 4:
        m.data = w21.T
    if count == 5:
        m.data = b21
    if count == 6:
        m.data = w22.T
    if count == 7:
        m.data = b22
    count += 1
# print(model3.state_dict()["sharedlayer.2.weight"].T)
# print(model2.forward(x_test))
# print(model3.forward(x_test))

for epoch in range(300):
    for x_batch,y_batch in TensorDataset(x_train,y_train):
        y_hat = model3.forward(x_batch)
        loss_f = nn.MSELoss(reduction='none')
        loss = loss_f(y_batch,y_hat).sum()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1)%10==0:
        y_pred = model3.forward(x_test)
        loss = ((y_pred-y_test)**2).sum()/(len(y_pred)*2)
        y_pred1 = model3.forward(x_train)
        loss1 = ((y_pred1-y_train)**2).sum()/(len(y_pred1)*2)
        print("epoch:%d , test_loss:%f , train_loss:%f"%(epoch+1,loss,loss1))

epoch:10 , test_loss:0.014157 , train_loss:0.013497
epoch:20 , test_loss:0.013859 , train_loss:0.013142
epoch:30 , test_loss:0.013812 , train_loss:0.013052
epoch:40 , test_loss:0.013796 , train_loss:0.013009
epoch:50 , test_loss:0.013791 , train_loss:0.012986
epoch:60 , test_loss:0.013789 , train_loss:0.012971
epoch:70 , test_loss:0.013789 , train_loss:0.012960
epoch:80 , test_loss:0.013789 , train_loss:0.012952
epoch:90 , test_loss:0.013789 , train_loss:0.012945
epoch:100 , test_loss:0.013789 , train_loss:0.012939
epoch:110 , test_loss:0.013789 , train_loss:0.012933
epoch:120 , test_loss:0.013790 , train_loss:0.012928
epoch:130 , test_loss:0.013790 , train_loss:0.012924
epoch:140 , test_loss:0.013790 , train_loss:0.012919
epoch:150 , test_loss:0.013790 , train_loss:0.012915
epoch:160 , test_loss:0.013790 , train_loss:0.012910
epoch:170 , test_loss:0.013790 , train_loss:0.012906
epoch:180 , test_loss:0.013791 , train_loss:0.012902
epoch:190 , test_loss:0.013791 , train_loss:0.012898
ep

In [7]:
#硬参数共享神经网络(不复制节点)
class MTLnet(nn.Module):
    def __init__(self):
        super(MTLnet, self).__init__()

        self.sharedlayer = nn.Sequential(
            nn.Linear(4, 4),
            nn.Sigmoid(),
            nn.Linear(4,3),
            nn.Sigmoid(),
        )
        self.y1 = nn.Sequential(
            nn.Linear(3, 1),
            nn.Sigmoid(),
        )
        self.y2 = nn.Sequential(
            nn.Linear(3, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        h_shared = self.sharedlayer(x)
        y1 = self.y1(h_shared)
        y2 = self.y2(h_shared)
        if len(y1)==1:
            return torch.cat([y1, y2])
        else:
            return torch.cat([y1,y2],1)
model = MTLnet()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
for epoch in range(300):
    for x_batch,y_batch in TensorDataset(x_train,y_train):
        y_hat = model.forward(x_batch)
        loss_f = nn.MSELoss(reduction='none')
        loss = loss_f(y_batch,y_hat).sum()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1)%10==0:
        y_pred = model.forward(x_test)
        loss = ((y_pred-y_test)**2).sum()/(len(y_pred)*2)
        y_pred1 = model.forward(x_train)
        loss1 = ((y_pred1-y_train)**2).sum()/(len(y_pred1)*2)
        print("epoch:%d , test_loss:%f , train_loss:%f"%(epoch+1,loss,loss1))

epoch:10 , test_loss:0.016356 , train_loss:0.014841
epoch:20 , test_loss:0.015518 , train_loss:0.014408
epoch:30 , test_loss:0.015243 , train_loss:0.014279
epoch:40 , test_loss:0.015013 , train_loss:0.014134
epoch:50 , test_loss:0.014841 , train_loss:0.013995
epoch:60 , test_loss:0.014748 , train_loss:0.013898
epoch:70 , test_loss:0.014689 , train_loss:0.013825
epoch:80 , test_loss:0.014646 , train_loss:0.013768
epoch:90 , test_loss:0.014614 , train_loss:0.013721
epoch:100 , test_loss:0.014588 , train_loss:0.013678
epoch:110 , test_loss:0.014565 , train_loss:0.013638
epoch:120 , test_loss:0.014543 , train_loss:0.013599
epoch:130 , test_loss:0.014524 , train_loss:0.013564
epoch:140 , test_loss:0.014508 , train_loss:0.013535
epoch:150 , test_loss:0.014494 , train_loss:0.013513
epoch:160 , test_loss:0.014483 , train_loss:0.013495
epoch:170 , test_loss:0.014473 , train_loss:0.013482
epoch:180 , test_loss:0.014464 , train_loss:0.013471
epoch:190 , test_loss:0.014456 , train_loss:0.013462
ep

In [3]:
class net:
    def __init__(self,hidden_layer=3):
        self.hidden_layer = hidden_layer
    
    def _sigmoid(self,x):
        for i in range(x.shape[0]):
            for k in range(x.shape[1]):
                x[i,k] = 1/(1+np.exp(-x[i,k]))
        return x
    
    def forward(self,x):
        n = x.shape[1]
        w1 = np.random.randn(n,self.hidden_layer)
        b1 = np.zeros((self.hidden_layer,1)).T
        w2 = np.random.randn(self.hidden_layer,2)
        b2 = np.zeros((2,1)).T
        y_hat = self._sigmoid((x@w1+b1)@w2+b2)
        params = {"w1":w1,
                    "w2":w2,
                    "b1":b1,
                    "b2":b2}
        return y_hat,params

def mse_loss(y,y_hat):
    m = y.shape[0]
    n = y.shape[1]
    return np.sum((y-y_hat)**2)/(m*n)

def matr(m1,m2):
    return (m1>=0) == (m2>=0)

def sigmoid(x):
        for i in range(x.shape[0]):
            for k in range(x.shape[1]):
                x[i,k] = 1/(1+np.exp(-x[i,k]))
        return x


In [54]:
model = net()
threshold = 3
n = x_train.shape[1]
cm1 = np.zeros((n,3))
lr = 0.001
epoches = 1000
y_hat,params = model.forward(x_train)
w1 = params["w1"]
w2 = params["w2"]
b1 = params["b1"]
b2 = params["b2"]
for epoch in range(epoches):
    y_hat = sigmoid((x_train@w1+b1)@w2+b2)
    m = x_train.shape[0]
    loss = mse_loss(y_hat,y_train)
    dw2 = (np.vstack((-(y_train[:,0].reshape(m,1)-y_hat[:,0].reshape(m,1)).T@y_hat[:,0].reshape(m,1)@(1-y_hat[:,0].reshape(m,1)).T@(x_train@w1+b1),-(y_train[:,1].reshape(m,1)-y_hat[:,1].reshape(m,1)).T@y_hat[:,1].reshape(m,1)@(1-y_hat[:,1].reshape(m,1)).T@(x_train@w1+b1)))).T#3*2
    dw1 = (-((y_train[:,0].reshape(m,1)-y_hat[:,0].reshape(m,1))@y_hat[:,0].reshape(m,1).T@(1-y_hat[:,0].reshape(m,1))@(w2[:,0].reshape(3,1)).T).T@x_train).T+(-((y_train[:,1].reshape(m,1)-y_hat[:,1].reshape(m,1))@y_hat[:,1].reshape(m,1).T@(1-y_hat[:,1].reshape(m,1))@(w2[:,1].reshape(3,1)).T).T@x_train).T
    db2 = (np.hstack((np.sum(-(y_train[:,0].reshape(m,1)-y_hat[:,0].reshape(m,1)).T@y_hat[:,0].reshape(m,1)@(1-y_hat[:,0].reshape(m,1)).T),np.sum(-(y_train[:,1].reshape(m,1)-y_hat[:,1].reshape(m,1)).T@y_hat[:,1].reshape(m,1)@(1-y_hat[:,1].reshape(m,1)).T)))).T.reshape(1,2)#1*2
    db1 = (-((y_train[:,0].reshape(m,1)-y_hat[:,0].reshape(m,1))@y_hat[:,0].reshape(m,1).T@(1-y_hat[:,0].reshape(m,1))@(w2[:,0].reshape(3,1)).T).T@np.ones((m,1))).T+(-((y_train[:,1].reshape(m,1)-y_hat[:,1].reshape(m,1))@y_hat[:,1].reshape(m,1).T@(1-y_hat[:,1].reshape(m,1))@(w2[:,1].reshape(3,1)).T).T@np.ones((m,1))).T
    w1 -= lr*dw1
    w2 -= lr*dw2
    b1 -= lr*db1
    b2 -= lr*db2
    if (epoch+1)%100==0:
        print(f"loss:{loss} , epoch:{epoch+1}")
    d1w1 = (-((y_train[:,0].reshape(m,1)-y_hat[:,0].reshape(m,1))@y_hat[:,0].reshape(m,1).T@(1-y_hat[:,0].reshape(m,1))@(w2[:,0].reshape(3,1)).T).T@x_train).T
    d2w1 = (-((y_train[:,1].reshape(m,1)-y_hat[:,1].reshape(m,1))@y_hat[:,1].reshape(m,1).T@(1-y_hat[:,1].reshape(m,1))@(w2[:,1].reshape(3,1)).T).T@x_train).T
    dm = matr(d1w1,d2w1)
    cm2 = cm1.copy()
    for i in range(dm.shape[0]):
        for k in range(dm.shape[1]):
            if dm[i,k]==False:
                cm1[i,k] += 1 
            if cm1[i,k]!=cm2[i,k]+1:
                cm1[i,k]=0
    if np.max(cm1) >= threshold:
        print("达到满足条件，迭代循环终止")
        index = (np.argmax(cm1)+1)%3
        if index == 0:
            index = 3-1
        else:
            index -= 1
        break




达到满足条件，迭代循环终止
