# Dropout과 Dead Relu는 어떤 차이점이 있을까?

In [24]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import copy

np.set_printoptions(threshold=np.inf, linewidth=np.inf)

In [25]:
relu_linear1 = nn.Linear(4, 3, bias=True)
relu_linear2 = nn.Linear(3, 3, bias=True)
relu_linear3 = nn.Linear(3, 2, bias=True)

relu_drop_linear1 = copy.deepcopy(relu_linear1)
relu_drop_linear2 = copy.deepcopy(relu_linear2)
relu_drop_linear3 = copy.deepcopy(relu_linear3)

drop_linear1 = copy.deepcopy(relu_linear1)
drop_linear2 = copy.deepcopy(relu_linear2)
drop_linear3 = copy.deepcopy(relu_linear3)

class Relu_model(nn.Module):
    def __init__(self):
        super(Relu_model, self).__init__()
        self.fc1 = relu_linear1
        self.fc2 = relu_linear2
        self.fc3 = relu_linear3
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        print(out)
        out = self.fc2(out)
        out = self.relu(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out
    
class Relu_Dropout_model(nn.Module):
    def __init__(self):
        super(Relu_Dropout_model, self).__init__()
        self.fc1 = relu_drop_linear1
        self.fc2 = relu_drop_linear2
        self.fc3 = relu_drop_linear3
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        print(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out
    
class Dropout_model(nn.Module):
    def __init__(self):
        super(Dropout_model, self).__init__()
        self.fc1 = drop_linear1
        self.fc2 = drop_linear2
        self.fc3 = drop_linear3
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.dropout(out)
        print(out)
        out = self.fc2(out)
        out = self.dropout(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out

In [26]:
input = []
target = []

for i in range(10):
    x = np.random.randint(0,10, size=4)
    y = np.random.randint(0,2)
    input.append(x)
    target.append(y)

input = torch.FloatTensor(input)
target = torch.LongTensor(target)

dataset = TensorDataset(input, target)
loader = DataLoader(dataset = dataset, batch_size=1, shuffle=True)

In [27]:
for x,y in loader:
    print(x)
    print(y)

tensor([[4., 1., 4., 8.]])
tensor([1])
tensor([[2., 4., 8., 6.]])
tensor([1])
tensor([[7., 8., 0., 4.]])
tensor([0])
tensor([[5., 0., 6., 9.]])
tensor([1])
tensor([[8., 5., 7., 5.]])
tensor([1])
tensor([[7., 3., 2., 7.]])
tensor([1])
tensor([[1., 4., 3., 1.]])
tensor([1])
tensor([[4., 4., 0., 6.]])
tensor([1])
tensor([[0., 7., 2., 8.]])
tensor([0])
tensor([[6., 5., 7., 5.]])
tensor([0])


In [28]:
relu_model = Relu_model()
relu_dropout_model = Relu_Dropout_model()
dropout_model = Dropout_model()

print('\n ReLU 모델 parameters \n')
for names in relu_model.state_dict():
    print(names)
    print(relu_model.state_dict()[names])
    
print('\n ReLU + Dropout 모델 parameters \n')
for names in relu_dropout_model.state_dict():
    print(names)
    print(relu_dropout_model.state_dict()[names])

print('\n Dropout 모델 parameters \n')
for names in dropout_model.state_dict():
    print(names)
    print(dropout_model.state_dict()[names])


 ReLU 모델 parameters 

fc1.weight
tensor([[-0.3128, -0.3936,  0.0372,  0.2312],
        [-0.0526, -0.3412, -0.1531,  0.4587],
        [ 0.2589, -0.4472,  0.0852, -0.0846]])
fc1.bias
tensor([0.4960, 0.2994, 0.3567])
fc2.weight
tensor([[-0.4752,  0.0661,  0.4530],
        [ 0.1787, -0.0713,  0.0952],
        [ 0.2817, -0.1652, -0.1513]])
fc2.bias
tensor([-0.4743,  0.2010,  0.3862])
fc3.weight
tensor([[-0.0580, -0.4884, -0.3422],
        [ 0.3710,  0.1598,  0.2137]])
fc3.bias
tensor([-0.5740,  0.4736])

 ReLU + Dropout 모델 parameters 

fc1.weight
tensor([[-0.3128, -0.3936,  0.0372,  0.2312],
        [-0.0526, -0.3412, -0.1531,  0.4587],
        [ 0.2589, -0.4472,  0.0852, -0.0846]])
fc1.bias
tensor([0.4960, 0.2994, 0.3567])
fc2.weight
tensor([[-0.4752,  0.0661,  0.4530],
        [ 0.1787, -0.0713,  0.0952],
        [ 0.2817, -0.1652, -0.1513]])
fc2.bias
tensor([-0.4743,  0.2010,  0.3862])
fc3.weight
tensor([[-0.0580, -0.4884, -0.3422],
        [ 0.3710,  0.1598,  0.2137]])
fc3.bias
tensor(

In [29]:
relu_optimizer = optim.Adam(relu_model.parameters(), lr = 0.01)
relu_dropout_optimizer = optim.Adam(relu_dropout_model.parameters(), lr=0.01)
dropout_optimizer = optim.Adam(dropout_model.parameters(), lr=0.01)

criterion = torch.nn.CrossEntropyLoss()

for i, data in enumerate(loader): 
    relu_model.train()
    relu_dropout_model.train()
    dropout_model.train()    
    
    print('\n', i+1)
    x,y = data
    
    relu_optimizer.zero_grad()
    relu_dropout_optimizer.zero_grad()
    dropout_optimizer.zero_grad()       
    
    print('ReLU 모델 output \n')
    r_output = relu_model(x)
    r_loss = criterion(r_output,y)
    print(r_loss)
    r_loss.backward()
    relu_optimizer.step()
    
    print('\n ReLU + Dropout 모델 output \n')
    rd_output = relu_dropout_model(x)
    rd_loss = criterion(rd_output, y)
    rd_loss.backward()
    relu_dropout_optimizer.step()
    
    print('\n Dropout 모델 output \n')
    d_output = dropout_model(x)
    d_loss = criterion(d_output, y)
    d_loss.backward()
    dropout_optimizer.step()
    
    print('\n ReLU 모델 parameters \n')
    for names in relu_model.state_dict():
        print(names)
        print(relu_model.state_dict()[names])
        
    print('\n ReLU + Dropout 모델 parameters \n')
    for names in relu_dropout_model.state_dict():
        print(names)
        print(relu_dropout_model.state_dict()[names])
    
    print('\n Dropout 모델 parameters \n')
    for names in dropout_model.state_dict():
        print(names)
        print(dropout_model.state_dict()[names])
     
     


 1
ReLU 모델 output 

tensor([[0.8495, 2.8050, 0.6089]], grad_fn=<ReluBackward0>)
tensor([[0.0000, 0.2109, 0.0699]], grad_fn=<ReluBackward0>)
tensor([[-0.7009,  0.5222]], grad_fn=<AddmmBackward>)
tensor(0.2580, grad_fn=<NllLossBackward>)

 ReLU + Dropout 모델 output 

tensor([[0.0000, 5.6100, 0.0000]], grad_fn=<MulBackward0>)
tensor([[0., 0., 0.]], grad_fn=<MulBackward0>)
tensor([[-0.5740,  0.4736]], grad_fn=<AddmmBackward>)

 Dropout 모델 output 

tensor([[1.6990, 0.0000, 0.0000]], grad_fn=<MulBackward0>)
tensor([[-0.0000, 0.0000, 1.7298]], grad_fn=<MulBackward0>)
tensor([[-1.1659,  0.8433]], grad_fn=<AddmmBackward>)

 ReLU 모델 parameters 

fc1.weight
tensor([[-0.3028, -0.3836,  0.0472,  0.2412],
        [-0.0626, -0.3512, -0.1631,  0.4487],
        [ 0.2489, -0.4572,  0.0752, -0.0946]])
fc1.bias
tensor([0.5060, 0.2894, 0.3467])
fc2.weight
tensor([[-0.4752,  0.0661,  0.4530],
        [ 0.1887, -0.0613,  0.1052],
        [ 0.2917, -0.1552, -0.1413]])
fc2.bias
tensor([-0.4743,  0.2110,  0.396