## Dropout과 배치 정규화 이해하기

In [1]:
import numpy as np
import pandas as pd

train = pd.read_csv('/content/drive/MyDrive/Euron 6기/train.csv')
test = pd.read_csv('/content/drive/MyDrive/Euron 6기/test.csv')
submission = pd.read_csv('/content/drive/MyDrive/Euron 6기/gender_submission.csv')

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.classifier = nn.Sequential(
            nn.Linear(5, 128),
            ## Batch Normalization between 'Layer' and 'Activation function'
            nn.BatchNorm1d(128),
            nn.ReLU(),
            ## Drop out after 'Activation function'
            nn.Dropout(0.1),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )


#순전파 함수
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [3]:
data_set = pd.concat((train.drop(['Survived'], axis = 1), test), axis = 0)

data_set = data_set.drop(['PassengerId', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis = 1)
data_set = data_set.fillna(data_set.mean())

n_train = train.shape[0]
train_x, test_x = data_set[:n_train], data_set[n_train:]
train_y = train['Survived']

train_x = train_x[train_x.keys()].values
test_x = test_x[test_x.keys()].values
train_y = train_y.values

import torch.optim as optim
from torch.autograd import Variable

simple_nn = SimpleNN()
optimizer = optim.Adam(simple_nn.parameters(), lr=0.01)
error = nn.BCELoss()

batch_size = 99
batch_count = int(len(train_x) / batch_size)

#에폭 당 loss 비교
for epoch in range(300):
    train_loss = 0
    num_right = 0
    for i in range(batch_count):
        start = i * batch_size
        end = start + batch_size
        tensor_x = torch.FloatTensor(train_x[start:end])
        tensor_y = torch.FloatTensor(train_y[start:end]).reshape(-1, 1)

        optimizer.zero_grad()
        output = simple_nn(tensor_x)
        loss = error(output, tensor_y)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * batch_size
        result = [1 if out >= 0.5 else 0 for out in output]
        num_right += np.sum(np.array(result) == train_y[start:end])

    train_loss = train_loss / len(train_x)
    accuracy = num_right / len(train_x)

    if epoch % 25 == 0:
        print('Loss: {} Accuracy: {}% Epoch:{}'.format(train_loss, accuracy, epoch))

print('Training Ended')

Loss: 0.6692924367056953 Accuracy: 0.6419753086419753% Epoch:0
Loss: 0.5431633525424533 Accuracy: 0.7373737373737373% Epoch:25
Loss: 0.4743279880947537 Accuracy: 0.7620650953984287% Epoch:50
Loss: 0.4282568295796712 Accuracy: 0.8114478114478114% Epoch:75
Loss: 0.3764321539137099 Accuracy: 0.8260381593714927% Epoch:100
Loss: 0.3634873463047875 Accuracy: 0.8428731762065096% Epoch:125
Loss: 0.30443838569853043 Accuracy: 0.8731762065095399% Epoch:150
Loss: 0.30148064427905613 Accuracy: 0.8597081930415263% Epoch:175
Loss: 0.26195374296771157 Accuracy: 0.8900112233445566% Epoch:200
Loss: 0.2547840740945604 Accuracy: 0.8832772166105499% Epoch:225
Loss: 0.27318720685111153 Accuracy: 0.8787878787878788% Epoch:250
Loss: 0.2501436306370629 Accuracy: 0.8911335578002245% Epoch:275
Training Ended


In [4]:
tensor_test_x = torch.FloatTensor(test_x)
with torch.no_grad():
    test_output = simple_nn(tensor_test_x)
    result = np.array([1 if out >= 0.5 else 0 for out in test_output])
    # submission = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': result})
    # submission.to_csv('submission.csv', index=False)