# Dropout과 Dead Relu는 어떤 차이점이 있을까?

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader, TensorDataset

import numpy as np
import copy

np.set_printoptions(threshold=np.inf, linewidth=np.inf)

In [13]:
relu_linear1 = nn.Linear(4, 3, bias=True)
relu_linear2 = nn.Linear(3, 3, bias=True)
relu_linear3 = nn.Linear(3, 2, bias=True)

relu_drop_linear1 = copy.deepcopy(relu_linear1)
relu_drop_linear2 = copy.deepcopy(relu_linear2)
relu_drop_linear3 = copy.deepcopy(relu_linear3)

drop_linear1 = copy.deepcopy(relu_linear1)
drop_linear2 = copy.deepcopy(relu_linear2)
drop_linear3 = copy.deepcopy(relu_linear3)

class Relu_model(nn.Module):
    def __init__(self):
        super(Relu_model, self).__init__()
        self.fc1 = relu_linear1
        self.fc2 = relu_linear2
        self.fc3 = relu_linear3
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        print(out)
        out = self.fc2(out)
        out = self.relu(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out
    
class Relu_Dropout_model(nn.Module):
    def __init__(self):
        super(Relu_Dropout_model, self).__init__()
        self.fc1 = relu_drop_linear1
        self.fc2 = relu_drop_linear2
        self.fc3 = relu_drop_linear3
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.dropout(out)
        print(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out
    
class Dropout_model(nn.Module):
    def __init__(self):
        super(Dropout_model, self).__init__()
        self.fc1 = drop_linear1
        self.fc2 = drop_linear2
        self.fc3 = drop_linear3
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.dropout(out)
        print(out)
        out = self.fc2(out)
        out = self.dropout(out)
        print(out)
        out = self.fc3(out)
        print(out)
        
        return out

In [14]:
input = []
target = []

for i in range(10):
    x = np.random.randint(0,10, size=4)
    y = np.random.randint(0,2)
    input.append(x)
    target.append(y)

input = torch.FloatTensor(input)
target = torch.LongTensor(target)

dataset = TensorDataset(input, target)
loader = DataLoader(dataset = dataset, batch_size=1, shuffle=True)

In [15]:
for x,y in loader:
    print(x)
    print(y)

tensor([[1., 2., 3., 2.]])
tensor([0])
tensor([[4., 9., 5., 3.]])
tensor([1])
tensor([[3., 9., 3., 0.]])
tensor([1])
tensor([[1., 9., 4., 8.]])
tensor([0])
tensor([[5., 8., 4., 1.]])
tensor([1])
tensor([[1., 1., 3., 4.]])
tensor([0])
tensor([[6., 1., 3., 8.]])
tensor([1])
tensor([[4., 4., 4., 8.]])
tensor([0])
tensor([[2., 6., 6., 1.]])
tensor([1])
tensor([[5., 6., 2., 2.]])
tensor([1])


In [16]:
relu_model = Relu_model()
relu_dropout_model = Relu_Dropout_model()
dropout_model = Dropout_model()

print('\n ReLU 모델 parameters \n')
for names in relu_model.state_dict():
    print(names)
    print(relu_model.state_dict()[names])
    
print('\n ReLU + Dropout 모델 parameters \n')
for names in relu_dropout_model.state_dict():
    print(names)
    print(relu_dropout_model.state_dict()[names])

print('\n Dropout 모델 parameters \n')
for names in dropout_model.state_dict():
    print(names)
    print(dropout_model.state_dict()[names])


 ReLU 모델 parameters 

fc1.weight
tensor([[-0.1445, -0.1877,  0.1988, -0.2451],
        [-0.1419,  0.4014, -0.0861,  0.2474],
        [ 0.2384, -0.2071,  0.3202, -0.4563]])
fc1.bias
tensor([-0.2018,  0.4938,  0.3645])
fc2.weight
tensor([[-0.1054,  0.1650,  0.1490],
        [ 0.1499,  0.2248,  0.1168],
        [-0.4862,  0.1109,  0.3984]])
fc2.bias
tensor([ 0.4801,  0.2798, -0.5243])
fc3.weight
tensor([[ 0.3120,  0.2138,  0.3970],
        [ 0.5510, -0.3279,  0.5501]])
fc3.bias
tensor([-0.0059, -0.2946])

 ReLU + Dropout 모델 parameters 

fc1.weight
tensor([[-0.1445, -0.1877,  0.1988, -0.2451],
        [-0.1419,  0.4014, -0.0861,  0.2474],
        [ 0.2384, -0.2071,  0.3202, -0.4563]])
fc1.bias
tensor([-0.2018,  0.4938,  0.3645])
fc2.weight
tensor([[-0.1054,  0.1650,  0.1490],
        [ 0.1499,  0.2248,  0.1168],
        [-0.4862,  0.1109,  0.3984]])
fc2.bias
tensor([ 0.4801,  0.2798, -0.5243])
fc3.weight
tensor([[ 0.3120,  0.2138,  0.3970],
        [ 0.5510, -0.3279,  0.5501]])
fc3.bias
t

In [17]:
relu_optimizer = optim.Adam(relu_model.parameters(), lr = 0.01)
relu_dropout_optimizer = optim.Adam(relu_dropout_model.parameters(), lr=0.01)
dropout_optimizer = optim.Adam(dropout_model.parameters(), lr=0.01)

criterion = torch.nn.CrossEntropyLoss()

for i, data in enumerate(loader): 
    relu_model.train()
    relu_dropout_model.train()
    dropout_model.train()    
    
    print('\n', i+1)
    x,y = data
    
    relu_optimizer.zero_grad()
    relu_dropout_optimizer.zero_grad()
    dropout_optimizer.zero_grad()       
    
    print('ReLU 모델 output \n')
    r_output = relu_model(x)
    r_loss = criterion(r_output,y)
    r_loss.backward()
    relu_optimizer.step()
    
    print('\n ReLU + Dropout 모델 output \n')
    rd_output = relu_dropout_model(x)
    rd_loss = criterion(rd_output, y)
    rd_loss.backward()
    relu_dropout_optimizer.step()
    
    print('\n Dropout 모델 output \n')
    d_output = dropout_model(x)
    d_loss = criterion(d_output, y)
    d_loss.backward()
    dropout_optimizer.step()
    
    print('\n ReLU 모델 parameters \n')
    for names in relu_model.state_dict():
        print(names)
        print(relu_model.state_dict()[names])
        
    print('\n ReLU + Dropout 모델 parameters \n')
    for names in relu_dropout_model.state_dict():
        print(names)
        print(relu_dropout_model.state_dict()[names])
    
    print('\n Dropout 모델 parameters \n')
    for names in dropout_model.state_dict():
        print(names)
        print(dropout_model.state_dict()[names])
     
     


 1
ReLU 모델 output 

tensor([[0.0000, 1.3911, 0.2366]], grad_fn=<ReluBackward0>)
tensor([[0.7449, 0.6202, 0.0000]], grad_fn=<ReluBackward0>)
tensor([[ 0.3592, -0.0875]], grad_fn=<AddmmBackward>)

 ReLU + Dropout 모델 output 

tensor([[0.0000, 2.7823, 0.0000]], grad_fn=<MulBackward0>)
tensor([[1.8783, 1.8106, 0.0000]], grad_fn=<MulBackward0>)
tensor([[0.9674, 0.1467]], grad_fn=<AddmmBackward>)

 Dropout 모델 output 

tensor([[-0.0000, 2.7823, 0.0000]], grad_fn=<MulBackward0>)
tensor([[1.8783, 0.0000, -0.0000]], grad_fn=<MulBackward0>)
tensor([[0.5802, 0.7404]], grad_fn=<AddmmBackward>)

 ReLU 모델 parameters 

fc1.weight
tensor([[-0.1445, -0.1877,  0.1988, -0.2451],
        [-0.1319,  0.4114, -0.0761,  0.2574],
        [ 0.2484, -0.1971,  0.3302, -0.4463]])
fc1.bias
tensor([-0.2018,  0.5038,  0.3745])
fc2.weight
tensor([[-0.1054,  0.1550,  0.1390],
        [ 0.1499,  0.2348,  0.1268],
        [-0.4862,  0.1109,  0.3984]])
fc2.bias
tensor([ 0.4701,  0.2898, -0.5243])
fc3.weight
tensor([[ 0.322