# Dropout과 Dead Relu는 어떤 차이점이 있을까?

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import copy

np.set_printoptions(threshold=np.inf, linewidth=np.inf)

In [2]:
relu_linear1 = nn.Linear(4, 3, bias=True)
relu_linear2 = nn.Linear(3, 3, bias=True)
relu_linear3 = nn.Linear(3, 2, bias=True)

relu_drop_linear1 = copy.deepcopy(relu_linear1)
relu_drop_linear2 = copy.deepcopy(relu_linear2)
relu_drop_linear3 = copy.deepcopy(relu_linear3)

drop_linear1 = copy.deepcopy(relu_linear1)
drop_linear2 = copy.deepcopy(relu_linear2)
drop_linear3 = copy.deepcopy(relu_linear3)

class Relu_model(nn.Module):
    def __init__(self):
        super(Relu_model, self).__init__()
        self.fc1 = relu_linear1
        self.fc2 = relu_linear2
        self.fc3 = relu_linear3
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        print(out)
        out = self.fc2(out)
        out = self.relu(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out
    
class Relu_Dropout_model(nn.Module):
    def __init__(self):
        super(Relu_Dropout_model, self).__init__()
        self.fc1 = relu_drop_linear1
        self.fc2 = relu_drop_linear2
        self.fc3 = relu_drop_linear3
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        print(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out
    
class Dropout_model(nn.Module):
    def __init__(self):
        super(Dropout_model, self).__init__()
        self.fc1 = drop_linear1
        self.fc2 = drop_linear2
        self.fc3 = drop_linear3
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.dropout(out)
        print(out)
        out = self.fc2(out)
        out = self.dropout(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out

In [3]:
input = []
target = []

for i in range(10):
    x = np.random.randint(0,10, size=4)
    y = np.random.randint(0,2)
    input.append(x)
    target.append(y)

input = torch.FloatTensor(input)
target = torch.LongTensor(target)

dataset = TensorDataset(input, target)
loader = DataLoader(dataset = dataset, batch_size=1, shuffle=True)

In [4]:
for x,y in loader:
    print(x)
    print(y)

tensor([[7., 2., 6., 9.]])
tensor([1])
tensor([[6., 0., 7., 2.]])
tensor([0])
tensor([[6., 2., 3., 6.]])
tensor([1])
tensor([[5., 5., 6., 1.]])
tensor([1])
tensor([[1., 2., 3., 7.]])
tensor([0])
tensor([[2., 3., 7., 3.]])
tensor([0])
tensor([[5., 1., 8., 2.]])
tensor([1])
tensor([[6., 6., 8., 0.]])
tensor([1])
tensor([[3., 1., 0., 0.]])
tensor([1])
tensor([[4., 0., 4., 5.]])
tensor([0])


In [5]:
relu_model = Relu_model()
relu_dropout_model = Relu_Dropout_model()
dropout_model = Dropout_model()

print('\n ReLU 모델 parameters \n')
for names in relu_model.state_dict():
    print(names)
    print(relu_model.state_dict()[names])
    
print('\n ReLU + Dropout 모델 parameters \n')
for names in relu_dropout_model.state_dict():
    print(names)
    print(relu_dropout_model.state_dict()[names])

print('\n Dropout 모델 parameters \n')
for names in dropout_model.state_dict():
    print(names)
    print(dropout_model.state_dict()[names])


 ReLU 모델 parameters 

fc1.weight
tensor([[ 0.1225,  0.4039, -0.1940,  0.0353],
        [-0.4475, -0.2872, -0.4309, -0.3942],
        [ 0.0140,  0.2350, -0.0237,  0.3182]])
fc1.bias
tensor([ 0.4662,  0.1929, -0.2529])
fc2.weight
tensor([[ 0.1620,  0.5245, -0.4796],
        [-0.4758,  0.1231, -0.4760],
        [-0.2737,  0.3361, -0.2157]])
fc2.bias
tensor([0.5407, 0.0119, 0.4214])
fc3.weight
tensor([[ 0.4272,  0.0323, -0.1558],
        [-0.1122, -0.0859,  0.0664]])
fc3.bias
tensor([-0.1454,  0.3115])

 ReLU + Dropout 모델 parameters 

fc1.weight
tensor([[ 0.1225,  0.4039, -0.1940,  0.0353],
        [-0.4475, -0.2872, -0.4309, -0.3942],
        [ 0.0140,  0.2350, -0.0237,  0.3182]])
fc1.bias
tensor([ 0.4662,  0.1929, -0.2529])
fc2.weight
tensor([[ 0.1620,  0.5245, -0.4796],
        [-0.4758,  0.1231, -0.4760],
        [-0.2737,  0.3361, -0.2157]])
fc2.bias
tensor([0.5407, 0.0119, 0.4214])
fc3.weight
tensor([[ 0.4272,  0.0323, -0.1558],
        [-0.1122, -0.0859,  0.0664]])
fc3.bias
tensor(

In [6]:
relu_optimizer = optim.SGD(relu_model.parameters(), lr = 0.01)
relu_dropout_optimizer = optim.SGD(relu_dropout_model.parameters(), lr=0.01)
dropout_optimizer = optim.SGD(dropout_model.parameters(), lr=0.01)

criterion = torch.nn.CrossEntropyLoss()

for i, data in enumerate(loader): 
    relu_model.train()
    relu_dropout_model.train()
    dropout_model.train()    
    
    print('\n', i+1)
    x,y = data
    
    relu_optimizer.zero_grad()
    relu_dropout_optimizer.zero_grad()
    dropout_optimizer.zero_grad()       
    
    print('ReLU 모델 output \n')
    r_output = relu_model(x)
    print(r_output)
    r_loss = criterion(r_output,y)
    print(r_loss)
    r_loss.backward()
    relu_optimizer.step()
    
    print('\n ReLU + Dropout 모델 output \n')
    rd_output = relu_dropout_model(x)
    rd_loss = criterion(rd_output, y)
    rd_loss.backward()
    relu_dropout_optimizer.step()
    
    print('\n Dropout 모델 output \n')
    d_output = dropout_model(x)
    d_loss = criterion(d_output, y)
    d_loss.backward()
    dropout_optimizer.step()
    
    print('\n ReLU 모델 parameters \n')
    for names in relu_model.state_dict():
        print(names)
        print(relu_model.state_dict()[names])
        
    print('\n ReLU + Dropout 모델 parameters \n')
    for names in relu_dropout_model.state_dict():
        print(names)
        print(relu_dropout_model.state_dict()[names])
    
    print('\n Dropout 모델 parameters \n')
    for names in dropout_model.state_dict():
        print(names)
        print(dropout_model.state_dict()[names])
     
     


 1
ReLU 모델 output 

tensor([[1.6387, 0.0000, 2.1394]], grad_fn=<ReluBackward0>)
tensor([[0., 0., 0.]], grad_fn=<ReluBackward0>)
tensor([[-0.1454,  0.3115]], grad_fn=<AddmmBackward>)
tensor([[-0.1454,  0.3115]], grad_fn=<AddmmBackward>)
tensor(0.4905, grad_fn=<NllLossBackward>)

 ReLU + Dropout 모델 output 

tensor([[3.2773, 0.0000, 0.0000]], grad_fn=<MulBackward0>)
tensor([[2.1430, 0.0000, 0.0000]], grad_fn=<MulBackward0>)
tensor([[0.7700, 0.0710]], grad_fn=<AddmmBackward>)

 Dropout 모델 output 

tensor([[0.0000, -0.0000, 4.2789]], grad_fn=<MulBackward0>)
tensor([[-0.0000, -0.0000, -1.0032]], grad_fn=<MulBackward0>)
tensor([[0.0109, 0.2449]], grad_fn=<AddmmBackward>)

 ReLU 모델 parameters 

fc1.weight
tensor([[ 0.1225,  0.4039, -0.1940,  0.0353],
        [-0.4475, -0.2872, -0.4309, -0.3942],
        [ 0.0140,  0.2350, -0.0237,  0.3182]])
fc1.bias
tensor([ 0.4662,  0.1929, -0.2529])
fc2.weight
tensor([[ 0.1620,  0.5245, -0.4796],
        [-0.4758,  0.1231, -0.4760],
        [-0.2737,  0.33

In [7]:
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
print(input)
target = torch.empty(3, dtype=torch.long).random_(5)
print(target)
output = loss(input, target)
print(output)
output.backward()

tensor([[-1.2739e+00,  4.7268e-01,  1.9138e-01, -2.0238e-01, -2.4849e-01],
        [ 5.9721e-01,  2.5011e-01, -7.2537e-01, -1.1257e+00,  4.3374e-01],
        [-2.3343e-04, -4.6638e-02,  2.1673e+00, -2.7834e-01,  9.6699e-01]],
       requires_grad=True)
tensor([1, 4, 0])
tensor(1.6601, grad_fn=<NllLossBackward>)


In [8]:
np.mean(np.log(0.9119) + np.log(1.8745))

0.5361170131513125