In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

%matplotlib inline

In [None]:

#dataset = datasets.load_iris()
#dataset = datasets.load_boston()
dataset = datasets.load_breast_cancer()
X=dataset.data
y=dataset.target.reshape(-1, 1)

encoder = OneHotEncoder(sparse=False)
y = encoder.fit_transform(y)
classes=y.shape[1]
y=dataset.target.reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
X_train.shape,X_test.shape,y_train.shape,y_test.shape,classes










((512, 30), (57, 30), (512, 1), (57, 1), 2)

In [None]:
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)



In [None]:

class Model1(nn.Module):
    def __init__(self,input_dim,output_dim,num_layers):
        super(Model1,self).__init__()
        self.input_layer   = nn.Linear(input_dim,5)
        self.hidden=nn.Sequential()
        for i in  range(num_layers-1) :

            self.hidden.add_module("hidden_layer"+str(i), nn.Linear(5,5))
            self.hidden.add_module("ActiveF"+str(i),nn.ReLU())

        self.output_layer   = nn.Linear(5,output_dim)

        #self.softmax = nn.Softmax(dim=-1)

        self.activefunction = nn.ReLU()
    def forward(self,x):
            out = self.input_layer(x)
            out=self.activefunction(out)
            out=self.hidden(out)
            out = self.output_layer(out)
            #out = self.softmax(out)
            return out


class Model3(nn.Module):
    def __init__(self,input_dim,output_dim,num_layers):
        super(Model3,self).__init__()
        self.input_layer= nn.Linear(input_dim,5)
        self.bn=nn.BatchNorm1d(5)
        self.hidden=nn.Sequential()
        for i in  range(num_layers-1) :

            self.hidden.add_module("hidden_layer"+str(i), nn.Linear(5,5))
            self.hidden.add_module("BN"+str(i),nn.BatchNorm1d(5))
            self.hidden.add_module("ActiveF"+str(i),nn.ReLU())

        self.output_layer   = nn.Linear(5,output_dim)

        #self.softmax = nn.Softmax(dim=-1)

        self.activefunction = nn.ReLU()

    def forward(self,x):
            out = self.input_layer(x)
            out = self.bn(out)
            out=self.activefunction(out)
            out=self.hidden(out)
            #print(out.shape)
            out = self.output_layer(out)
            #out = self.softmax(out)
            return out

class Model4(nn.Module):
    def __init__(self,input_dim,output_dim,num_layers):
        super(Model4,self).__init__()
        self.input_layer= nn.Linear(input_dim,5)
        self.bn=nn.BatchNorm1d(5)
        self.hidden=nn.Sequential()
        for i in  range(num_layers-1) :

            self.hidden.add_module("hidden_layer"+str(i), nn.Linear(5,5))
            self.hidden.add_module("BN"+str(i),nn.BatchNorm1d(5))
            self.hidden.add_module("ActiveF"+str(i),nn.Sigmoid())

        self.output_layer   = nn.Linear(5,output_dim)

        #self.softmax = nn.Softmax(dim=-1)

        self.activefunction = nn.Sigmoid()

    def forward(self,x):

            out = self.input_layer(x)
            out = self.bn(out)
            out=self.activefunction(out)
            out=self.hidden(out)
            #print(out.shape)
            out = self.output_layer(out)
            #out = self.softmax(out)
            return out

class Model2(nn.Module):
    def __init__(self,input_dim,output_dim,num_layers):
        super(Model2,self).__init__()
        self.input_layer   = nn.Linear(input_dim,5)
        self.hidden=nn.Sequential()
        for i in  range(num_layers-1) :

            self.hidden.add_module("hidden_layer"+str(i), nn.Linear(5,5))
            self.hidden.add_module("ActiveF"+str(i),nn.Sigmoid())

        self.output_layer   = nn.Linear(5,output_dim)

        #self.softmax = nn.Softmax(dim=-1)

        self.activefunction = nn.Sigmoid()
    def forward(self,x):
            out = self.input_layer(x)
            out=self.activefunction(out)
            out=self.hidden(out)
            out = self.output_layer(out)
            #out = self.softmax(out)
            return out


In [None]:
from torchsummary import summary



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model1(3,1,num_layers=5).to(device)
summary(model,(256,256,3),1 ,"cuda")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Model2(3,1,num_layers=5).to(device)
summary(model,(256,256,3),1 ,"cuda")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1           [1, 256, 256, 5]              20
              ReLU-2           [1, 256, 256, 5]               0
            Linear-3           [1, 256, 256, 5]              30
              ReLU-4           [1, 256, 256, 5]               0
            Linear-5           [1, 256, 256, 5]              30
              ReLU-6           [1, 256, 256, 5]               0
            Linear-7           [1, 256, 256, 5]              30
              ReLU-8           [1, 256, 256, 5]               0
            Linear-9           [1, 256, 256, 5]              30
             ReLU-10           [1, 256, 256, 5]               0
           Linear-11           [1, 256, 256, 1]               6
Total params: 146
Trainable params: 146
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.75
Forward/backward

In [None]:
from numpy import dtype
def train(num_layers,Loss,data,num_epochs=100,model=Model1,showepochs=1,classes=2):
    X_train,y_train,X_test,y_test = data


    input_dim  = X_train.shape[-1]
    output_dim = classes
    model = model(input_dim,output_dim,num_layers)
    optimizer = torch.optim.Adam(model.parameters(),lr=0.001)
    train_losses = []
    test_losses  = []
    train_acc=[]
    test_acc=[]
    y_train=y_train.squeeze(1)
    y_test=y_test.squeeze(1)

    for epoch in range(num_epochs):
        model.train()
        output_train = model(X_train)

        #print(output_train.shape,X_train.shape,y_train.shape)
        loss_train = Loss(output_train, y_train)
        acc_train = (torch.argmax(output_train, dim=1) == y_train).float().mean()
        #output = (output_train>0.5).float()
        #print(output_train,y_train)
        #acc_train = (output == y_train).float().mean()
        optimizer.zero_grad()
        loss_train.backward()

        optimizer.step()


        with torch.no_grad():
          model.eval()
          output_test = model(X_test)

          loss_test = Loss(output_test,y_test)
          acc_test = (torch.argmax(output_test, dim=1) == y_test).float().mean()
          #output = (output_test>0.5).float()
          #acc_test = (output == y_test).float().mean()


        train_losses.append(loss_train.item())
        test_losses.append(loss_test.item())
        train_acc.append(acc_train.item())
        test_acc.append(acc_test.item())
        if showepochs==1 :
          if (epoch+1) % 50 == 0:
            print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {loss_train.item():.4f}, Test Loss: {loss_test.item():.4f}, Train ACCuracy: {acc_train.item():.4f}, Test Accuracy: {acc_test.item():.4f}")



    return train_losses , test_losses , train_acc, test_acc , model


In [None]:

num_epochs=50
LOSS=nn.CrossEntropyLoss()

max=20
##########################################
Active_Function=nn.ReLU()
print('with ',Active_Function,' active faunction:')
#################
for num_layers in range(1,max+1,1):
  q=[]
  train_losses , test_losses,  train_acc , test_acc  , model  = train(num_layers,LOSS,(X_train,y_train,X_test,y_test),num_epochs,model=Model1,showepochs=0,classes=classes)

  print('network number' ,num_layers, " : \n")

  print(' layer 0', torch.norm(model.input_layer.weight.grad))

  for j in range(1,num_layers,1):

    r="model.hidden."+"hidden_layer"+str(j-1)+".weight.grad"
    print(' layer ',j, torch.norm(eval(r)))

  print(' layer ',num_layers,torch.norm(model.output_layer.weight.grad))

#for p in model.parameters():
# torch.norm(p.grad)

###############################################################
Active_Function=nn.Sigmoid()
print('with ',Active_Function,' active faunction:')
###################
for num_layers in range(1,max+1,1):

  train_losses , test_losses,  train_acc , test_acc  , model  = train(num_layers,LOSS,(X_train,y_train,X_test,y_test),num_epochs,model=Model3,showepochs=0,classes=classes)

  print('network number' ,num_layers, " : \n")

  print(' layer 0', torch.norm(model.input_layer.weight.grad))
  q.append
  for j in range(1,num_layers,1):

    r="model.hidden."+"hidden_layer"+str(j-1)+".weight.grad"
    print(' layer ',j, torch.norm(eval(r)))

  print(' layer ',num_layers,torch.norm(model.output_layer.weight.grad))



with  ReLU()  active faunction:
network number 1  : 

 layer 0 tensor(78.4073)
 layer  1 tensor(47.0155)
network number 2  : 

 layer 0 tensor(1.1224)
 layer  1 tensor(3.3314)
 layer  2 tensor(2.0711)
network number 3  : 

 layer 0 tensor(9.3815)
 layer  1 tensor(3.8643)
 layer  2 tensor(1.3978)
 layer  3 tensor(1.2998)
network number 4  : 

 layer 0 tensor(0.4004)
 layer  1 tensor(0.2198)
 layer  2 tensor(0.2016)
 layer  3 tensor(0.2800)
 layer  4 tensor(0.4968)
network number 5  : 

 layer 0 tensor(0.1621)
 layer  1 tensor(0.0969)
 layer  2 tensor(0.0774)
 layer  3 tensor(0.0414)
 layer  4 tensor(0.0944)
 layer  5 tensor(0.1174)
network number 6  : 

 layer 0 tensor(0.0486)
 layer  1 tensor(0.0173)
 layer  2 tensor(0.0082)
 layer  3 tensor(0.0256)
 layer  4 tensor(0.0203)
 layer  5 tensor(0.0353)
 layer  6 tensor(0.0808)
network number 7  : 

 layer 0 tensor(0.2721)
 layer  1 tensor(0.0393)
 layer  2 tensor(0.0309)
 layer  3 tensor(0.0673)
 layer  4 tensor(0.0425)
 layer  5 tensor(0.

استفاده از سیگموید باعث می شود با افزایش لایه ها ضرایب لایه های نخستین تضعیف شوند و اهمیت انها کمتر شود اما در رلو اینگونه نیست اگرچه کمی باز هم دیده می شود.


In [None]:
l=torch.tensor([[[1,2]] ,[[5,6]] , [[2 , 3]]])
print(l.shape)
print(torch.argmax(l , dim=2))


torch.Size([3, 1, 2])
tensor([[1],
        [1],
        [1]])


In [None]:



 #batchnorm
max=20
##########################################
Active_Function=nn.ReLU()
print('with ',Active_Function,' active faunction:')
#################
for num_layers in range(1,max+1,1):
  q=[]
  train_losses , test_losses,  train_acc , test_acc  , model  = train(num_layers,LOSS,(X_train,y_train,X_test,y_test),num_epochs,model=Model3,showepochs=0,classes=classes)

  print('network number' ,num_layers, " : \n")

  print(' layer 0', torch.norm(model.input_layer.weight.grad))

  for j in range(1,num_layers,1):

    r="model.hidden."+"hidden_layer"+str(j-1)+".weight.grad"
    print(' layer ',j, torch.norm(eval(r)))

  print(' layer ',num_layers,torch.norm(model.output_layer.weight.grad))



with  ReLU()  active faunction:
network number 1  : 

 layer 0 tensor(0.4292)
 layer  1 tensor(0.2625)
network number 2  : 

 layer 0 tensor(0.0612)
 layer  1 tensor(0.1070)
 layer  2 tensor(0.1893)
network number 3  : 

 layer 0 tensor(0.0297)
 layer  1 tensor(0.0161)
 layer  2 tensor(0.0413)
 layer  3 tensor(0.4938)
network number 4  : 

 layer 0 tensor(0.1119)
 layer  1 tensor(0.1237)
 layer  2 tensor(0.0738)
 layer  3 tensor(0.0666)
 layer  4 tensor(0.3027)
network number 5  : 

 layer 0 tensor(0.6773)
 layer  1 tensor(0.7945)
 layer  2 tensor(0.1000)
 layer  3 tensor(0.0600)
 layer  4 tensor(0.2562)
 layer  5 tensor(0.0764)
network number 6  : 

 layer 0 tensor(0.7436)
 layer  1 tensor(0.0751)
 layer  2 tensor(0.1817)
 layer  3 tensor(0.0707)
 layer  4 tensor(0.0424)
 layer  5 tensor(0.0370)
 layer  6 tensor(0.2013)
network number 7  : 

 layer 0 tensor(0.1079)
 layer  1 tensor(0.0402)
 layer  2 tensor(0.0353)
 layer  3 tensor(0.0341)
 layer  4 tensor(0.0339)
 layer  5 tensor(0.03

In [None]:



 #batchnorm    # sigmoid
max=20
##########################################
Active_Function=nn.Sigmoid()
print('with ',Active_Function,' active faunction:')
#################
for num_layers in range(1,max+1,1):
  q=[]
  train_losses , test_losses,  train_acc , test_acc  , model  = train(num_layers,LOSS,(X_train,y_train,X_test,y_test),num_epochs,model=Model4,showepochs=0,classes=classes)

  print('network number' ,num_layers, " : \n")

  print(' layer 0', torch.norm(model.input_layer.weight.grad))

  for j in range(1,num_layers,1):

    r="model.hidden."+"hidden_layer"+str(j-1)+".weight.grad"
    print(' layer ',j, torch.norm(eval(r)))

  print(' layer ',num_layers,torch.norm(model.output_layer.weight.grad))



with  Sigmoid()  active faunction:
network number 1  : 

 layer 0 tensor(2.1166)
 layer  1 tensor(0.4166)
network number 2  : 

 layer 0 tensor(0.3289)
 layer  1 tensor(0.0958)
 layer  2 tensor(0.1826)
network number 3  : 

 layer 0 tensor(0.1287)
 layer  1 tensor(0.0180)
 layer  2 tensor(0.0138)
 layer  3 tensor(0.4599)
network number 4  : 

 layer 0 tensor(0.0700)
 layer  1 tensor(0.0930)
 layer  2 tensor(0.0374)
 layer  3 tensor(0.0131)
 layer  4 tensor(0.3907)
network number 5  : 

 layer 0 tensor(0.1645)
 layer  1 tensor(0.1932)
 layer  2 tensor(0.0825)
 layer  3 tensor(0.0476)
 layer  4 tensor(0.0640)
 layer  5 tensor(0.4347)
network number 6  : 

 layer 0 tensor(0.1128)
 layer  1 tensor(0.0177)
 layer  2 tensor(0.0176)
 layer  3 tensor(0.0291)
 layer  4 tensor(0.0181)
 layer  5 tensor(0.0326)
 layer  6 tensor(0.2048)
network number 7  : 

 layer 0 tensor(0.0215)
 layer  1 tensor(0.0123)
 layer  2 tensor(0.0071)
 layer  3 tensor(0.0092)
 layer  4 tensor(0.0100)
 layer  5 tensor(0