# Dropout과 Dead Relu는 어떤 차이점이 있을까?

In [114]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import copy

np.set_printoptions(threshold=np.inf, linewidth=np.inf)

In [115]:
relu_linear1 = nn.Linear(16, 16, bias=True)
relu_linear2 = nn.Linear(16, 16, bias=True)
relu_linear3 = nn.Linear(16, 2, bias=True)

relu_drop_linear1 = copy.deepcopy(relu_linear1)
relu_drop_linear2 = copy.deepcopy(relu_linear2)
relu_drop_linear3 = copy.deepcopy(relu_linear3)

drop_linear1 = copy.deepcopy(relu_linear1)
drop_linear2 = copy.deepcopy(relu_linear2)
drop_linear3 = copy.deepcopy(relu_linear3)

class Relu_model(nn.Module):
    def __init__(self):
        super(Relu_model, self).__init__()
        self.fc1 = relu_linear1
        self.fc2 = relu_linear2
        self.fc3 = relu_linear3
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        print(out)
        out = self.fc2(out)
        out = self.relu(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out
    
class Relu_Dropout_model(nn.Module):
    def __init__(self):
        super(Relu_Dropout_model, self).__init__()
        self.fc1 = relu_drop_linear1
        self.fc2 = relu_drop_linear2
        self.fc3 = relu_drop_linear3
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        print(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out
    
class Dropout_model(nn.Module):
    def __init__(self):
        super(Dropout_model, self).__init__()
        self.fc1 = drop_linear1
        self.fc2 = drop_linear2
        self.fc3 = drop_linear3
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.dropout(out)
        print(out)
        out = self.fc2(out)
        out = self.dropout(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out

In [116]:
input = []
target = []

for i in range(10):
    x = np.random.rand(4,4)
    y = np.random.randint(0,2)
    input.append(x)
    target.append(y)

input = torch.FloatTensor(input)
target = torch.LongTensor(target)

dataset = TensorDataset(input, target)
loader = DataLoader(dataset = dataset, batch_size=1, shuffle=True)

In [117]:
relu_model = Relu_model()
relu_dropout_model = Relu_Dropout_model()
dropout_model = Dropout_model()

relu_optimizer = optim.Adam(relu_model.parameters(), lr = 0.01)
relu_dropout_optimizer = optim.Adam(relu_dropout_model.parameters(), lr=0.01)
dropout_optimizer = optim.Adam(dropout_model.parameters(), lr=0.01)

criterion = torch.nn.CrossEntropyLoss()

for i, data in enumerate(loader): 
    relu_model.train()
    relu_dropout_model.train()
    dropout_model.train()    
    
    print('\n', i+1)
    x,y = data
    X = x.view(-1, 16)
    
    relu_optimizer.zero_grad()
    relu_dropout_optimizer.zero_grad()
    dropout_optimizer.zero_grad()       
    
    print('ReLU 모델 output \n')
    r_output = relu_model(X)
    r_loss = criterion(r_output,y)
    r_loss.backward()
    relu_optimizer.step()
    
    print('\n ReLU + Dropout 모델 output \n')
    rd_output = relu_dropout_model(X)
    rd_loss = criterion(rd_output, y)
    rd_loss.backward()
    relu_dropout_optimizer.step()
    
    print('\n Dropout 모델 output \n')
    d_output = dropout_model(X)
    d_loss = criterion(d_output, y)
    d_loss.backward()
    dropout_optimizer.step()
    
    print('\n ReLU 모델 parameters \n')
    for names in relu_model.state_dict():
        print(names)
        print(relu_model.state_dict()[names])
        
    print('\n ReLU + Dropout 모델 parameters \n')
    for names in relu_dropout_model.state_dict():
        print(names)
        print(relu_dropout_model.state_dict()[names])
    
    print('\n Dropout 모델 parameters \n')
    for names in dropout_model.state_dict():
        print(names)
        print(dropout_model.state_dict()[names])
     
     


 1
ReLU 모델 output 

tensor([[0.0000, 0.0000, 0.0000, 0.2484, 0.0000, 0.0000, 0.0000, 0.1573, 0.0000,
         0.4085, 0.0000, 0.0000, 0.0000, 0.0000, 0.5498, 0.3598]],
       grad_fn=<ReluBackward0>)
tensor([[0.1484, 0.0127, 0.0000, 0.0000, 0.0000, 0.1628, 0.3041, 0.0000, 0.0000,
         0.0000, 0.0000, 0.1973, 0.0000, 0.0045, 0.0090, 0.0000]],
       grad_fn=<ReluBackward0>)
tensor([[-0.1105,  0.1512]], grad_fn=<AddmmBackward>)

 ReLU + Dropout 모델 output 

tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0995, 0.0000]],
       grad_fn=<MulBackward0>)
tensor([[0.1685, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.7475, 0.0000, 0.0000,
         0.0000, 0.0898, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]],
       grad_fn=<MulBackward0>)
tensor([[-0.0201,  0.1789]], grad_fn=<AddmmBackward>)

 Dropout 모델 output 

tensor([[-0.2057, -0.0000, -0.0000,  0.4969, -0.0000, -0.8051, -0.7806,  0.0000,
         -0.0000,  0.