In [1]:
import random
from datetime import datetime

import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import StratifiedKFold
from torch import nn, optim
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

### 데이터 엔지니어링

In [2]:
train  = pd.read_csv('data/train.csv')
test  = pd.read_csv('data/test.csv')

tipi_list = [f'TIPI{i + 1}' for i in range(10)]
drop_list = ['country','index']

for i in range(len(tipi_list) // 2):
    if i % 2 == 0:
        po_ti=tipi_list[i]
        ne_ti=tipi_list[i + 5]

    elif i % 2 == 1:
        po_ti=tipi_list[i + 5]
        ne_ti=tipi_list[i]

    train[ne_ti]=train[ne_ti].apply(lambda x : 6-x)
    train[f'gen_TIPI{i + 1}']=(train[po_ti]+train[ne_ti])/2
    test[ne_ti]=test[ne_ti].apply(lambda x : 6-x)
    test[f'gen_TIPI{i + 1}']=(test[po_ti]+test[ne_ti])/2

# VCL scoring
vcl_list = [f'VCL{i + 1}' for i in range(16)]

train_vcl_score_temp = train[vcl_list[0]]
test_vcl_score_temp = test[vcl_list[0]]
for i in vcl_list[1:]:
    if i == 'VCL6' or i == 'VCL9' or i == 'VCL12':
        train_vcl_score_temp -= train[i] * 4
        test_vcl_score_temp -= test[i] * 4
    else:
        train_vcl_score_temp += train[i]
        test_vcl_score_temp += test[i]

train['VCL_score'] = train_vcl_score_temp
test['VCL_score'] = test_vcl_score_temp

train = train.drop(train[train.familysize > 50].index)
train = train.drop(train[train.age > 120].index)
train_y = train['nerdiness']
train_x = train.drop(drop_list + tipi_list + ['nerdiness'], axis=1)
test = test.drop(drop_list + tipi_list, axis=1)
train_x.reset_index(drop=True,inplace=True)
train_x

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,...,voted,married,familysize,ASD,gen_TIPI1,gen_TIPI2,gen_TIPI3,gen_TIPI4,gen_TIPI5,VCL_score
0,1.0,5.0,5.0,5.0,1.0,4.0,5.0,5.0,1.0,3.0,...,2.0,1.0,4.0,2.0,2.5,4.0,4.0,5.0,3.0,8
1,4.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0,3.0,3.0,...,1.0,2.0,4.0,2.0,4.0,4.5,4.0,1.5,3.5,11
2,4.0,5.0,5.0,4.0,3.0,5.0,5.0,5.0,4.0,4.0,...,2.0,3.0,4.0,2.0,1.0,3.5,2.5,5.0,4.5,11
3,4.0,4.0,4.0,2.0,4.0,3.0,3.0,5.0,3.0,4.0,...,2.0,1.0,2.0,2.0,3.0,3.5,2.5,2.5,4.0,9
4,4.0,4.0,4.0,4.0,3.0,3.0,4.0,2.0,3.0,4.0,...,2.0,1.0,1.0,2.0,2.5,3.0,3.0,2.5,4.0,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14988,2.0,5.0,4.0,3.0,3.0,4.0,4.0,4.0,3.0,4.0,...,2.0,1.0,3.0,2.0,1.5,3.5,3.5,3.0,3.0,6
14989,5.0,4.0,5.0,4.0,4.0,5.0,5.0,4.0,4.0,5.0,...,1.0,2.0,3.0,2.0,3.0,4.0,4.5,1.5,4.0,8
14990,4.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,4.0,5.0,...,1.0,1.0,3.0,1.0,1.0,3.0,2.5,1.0,3.0,9
14991,5.0,5.0,4.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,...,2.0,2.0,2.0,1.0,1.0,5.0,2.5,1.5,4.5,7


In [3]:
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
DEVICE = 'cuda:0' if torch.cuda.is_available() else 'cpu'
train_y_t  = train_y.to_numpy()
train_x_t  = train_x.to_numpy()
test_x_t  = test.to_numpy()
train_y_t = torch.tensor(train_y_t, dtype=torch.float32)
train_x_t = torch.tensor(train_x_t, dtype=torch.float32)
test_x_t = torch.tensor(test_x_t, dtype=torch.float32)
test_len = len(test_x_t)

In [130]:
N_REPEAT = 5
N_SKFOLD = 5
N_EPOCH = 48
BATCH_SIZE = 256
LOADER_PARAM = {
    'batch_size': BATCH_SIZE,
    'num_workers': 4,
    'pin_memory': True
}
prediction = np.zeros((test_len, 1), dtype=np.float32)

for repeat in range(N_REPEAT):

    skf, tot = StratifiedKFold(n_splits=N_SKFOLD, random_state=repeat, shuffle=True), 0.
    for skfold, (train_idx, valid_idx) in enumerate(skf.split(train_x, train_y)):
        train_idx, valid_idx = list(train_idx), list(valid_idx)
        train_loader = DataLoader(TensorDataset(train_x_t[train_idx, :], train_y_t[train_idx]),
                                  shuffle=True, drop_last=True, **LOADER_PARAM)
        valid_loader = DataLoader(TensorDataset(train_x_t[valid_idx, :], train_y_t[valid_idx]),
                                  shuffle=False, drop_last=False, **LOADER_PARAM)
        test_loader = DataLoader(TensorDataset(test_x_t, torch.zeros((test_len,), dtype=torch.float32)),
                                 shuffle=False, drop_last=False, **LOADER_PARAM)
        model = nn.Sequential(
            nn.Dropout(0.05),
            nn.Linear(63, 180, bias=False),
            nn.LeakyReLU(0.05, inplace=True),
            nn.Dropout(0.5),
            nn.Linear(180, 32, bias=False),
           nn.ReLU(inplace=True),
            nn.Linear(32, 1),
            nn.Sigmoid()
        ).to(DEVICE)
        criterion = torch.nn.BCELoss().to(DEVICE)
        optimizer = torch.optim.SGD(model.parameters(), lr=1)  # modified learning rate from 0.1 to 1
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, T_0=N_EPOCH // 6, eta_min=4e-4)
        prediction_t, loss_t = np.zeros((test_len, 1), dtype=np.float32), 1.

        # for epoch in range(N_EPOCH):
        for epoch in tqdm(range(N_EPOCH), desc='{:02d}/{:02d}'.format(skfold + 1, N_SKFOLD)):
            model.train()
            for idx, (xx, yy) in enumerate(train_loader):
                optimizer.zero_grad()
                xx, yy = xx.to(DEVICE), yy.to(DEVICE)
                pred = model(xx).squeeze()
                pred=torch.nan_to_num(pred, nan=0.0)
                loss = criterion(pred, yy)
                print(loss)
                loss.backward()
                optimizer.step()
                scheduler.step(epoch + idx / len(train_loader))

            with torch.no_grad():
                model.eval()
                running_acc, running_loss, running_count = 0, 0., 0
                for xx, yy in valid_loader:
                    xx, yy = xx.to(DEVICE), yy.to(DEVICE)
                    pred = model(xx).squeeze()
                    pred=torch.nan_to_num(pred, nan=0.0)
                    loss = criterion(pred, yy)
                    running_loss += loss.item() * len(yy)
                    running_count += len(yy)
                    running_acc += ((torch.sigmoid(pred) > 0.5).float() == yy).sum().item()
                # print('R{:02d} S{:02d} E{:02d} | {:6.4f}, {:5.2f}%'
                #       .format(repeat + 1, skfold + 1, epoch + 1, running_loss / running_count,
                #               running_acc / running_count * 100))

                if running_loss / running_count < loss_t:
                    loss_t = running_loss / running_count
                    for idx, (xx, _) in enumerate(test_loader):
                        xx = xx.to(DEVICE)
                        pred = (2. - torch.sigmoid(model(xx).detach().to('cpu'))).numpy()
                        prediction_t[BATCH_SIZE * idx:min(BATCH_SIZE * (idx + 1), len(prediction)), :] \
                            = pred[:, :].copy()
        prediction[:, :] += prediction_t[:, :].copy() / (N_REPEAT * N_SKFOLD)
        tot += loss_t
    print('R{} -> {:6.4f}'.format(repeat + 1, tot / N_SKFOLD))

df = pd.read_csv('./Data/sample_submission.csv')
df.iloc[:, 1:] = prediction
df.to_csv('./Result/{}.csv'.format(datetime.now().strftime('%m%d-%H%M')), index=False)

01/05:   0%|                                                                                    | 0/48 [00:00<?, ?it/s]

tensor(10.2850, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48.4375, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.5469, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.7344, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.3750, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.8125, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.7344, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.

01/05:   2%|█▌                                                                          | 1/48 [00:05<04:32,  5.80s/it]

tensor(50., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48.4375, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.5625, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.9531, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.1250, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.3750, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.5156, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.5156, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.5938, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(61.3281, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50.3906, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.5156

01/05:   4%|███▏                                                                        | 2/48 [00:13<05:14,  6.83s/it]

tensor(58.5938, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.9375, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.5156, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.9844, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(49.6094, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.7344, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(48.8281, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.

01/05:   6%|████▊                                                                       | 3/48 [00:24<06:26,  8.59s/it]

tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.5469, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50.7812, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(62.1094, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.5469, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.5625, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.9375, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(61.3281, gr

01/05:   8%|██████▎                                                                     | 4/48 [00:33<06:29,  8.84s/it]

tensor(58.9844, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.1250, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(61.3281, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.5625, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.8125, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50.7812, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.

01/05:  10%|███████▉                                                                    | 5/48 [00:42<06:20,  8.84s/it]

tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.1719, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.5156, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.7656, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(62.8906, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.7656, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.1250, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.

01/05:  12%|█████████▌                                                                  | 6/48 [00:51<06:14,  8.93s/it]

tensor(57.0312, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50.3906, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50.3906, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(61.7188, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.0312, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.

01/05:  15%|███████████                                                                 | 7/48 [01:00<06:10,  9.03s/it]

tensor(51.1719, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.9531, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.5625, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(49.6094, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.3750, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.1562, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.

01/05:  17%|████████████▋                                                               | 8/48 [01:09<06:00,  9.02s/it]

tensor(52.7344, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.1562, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.8125, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.5469, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.0312, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.

01/05:  19%|██████████████▎                                                             | 9/48 [01:18<05:47,  8.90s/it]

tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.9375, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.9844, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.5469, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.5156, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.5938, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(49.2188, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.5156, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.5469, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.

01/05:  21%|███████████████▋                                                           | 10/48 [01:26<05:35,  8.83s/it]

tensor(51.9531, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.8594, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.9531, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.3750, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.1719, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.8125, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.9844, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.8125, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.

01/05:  23%|█████████████████▏                                                         | 11/48 [01:35<05:25,  8.79s/it]

tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.0312, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.5156, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.5625, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.9844, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.2969, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.0781, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(52.3438, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.1719, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(61.3281, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(50., grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(55.4688, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.1562, gr

01/05:  25%|██████████████████▊                                                        | 12/48 [01:44<05:17,  8.83s/it]

tensor(52.7344, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.0312, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.2500, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.9062, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.9844, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(53.1250, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(59.3750, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.1562, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(60.5469, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(56.6406, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.2031, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(49.2188, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(57.4219, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(51.9531, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(54.6875, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(58.

01/05:  25%|██████████████████▊                                                        | 12/48 [01:53<05:40,  9.46s/it]


KeyboardInterrupt: 

In [89]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [90]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [91]:
model = nn.Sequential(
          nn.Linear(2, 10, bias=True), # input_layer = 2, hidden_layer1 = 10
          nn.Sigmoid(),
          nn.Linear(10, 10, bias=True), # hidden_layer1 = 10, hidden_layer2 = 10
          nn.Sigmoid(),
          nn.Linear(10, 10, bias=True), # hidden_layer2 = 10, hidden_layer3 = 10
          nn.Sigmoid(),
          nn.Linear(10, 1, bias=True), # hidden_layer3 = 10, output_layer = 1
          nn.Sigmoid()
          ).to(device)

In [92]:
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1)

In [95]:
for epoch in range(10):
    optimizer.zero_grad()
    # forward 연산
    hypothesis = model(X)
    print(hypothesis)

    # 비용 함수
    cost = criterion(hypothesis, Y)
    print(cost)
    cost.backward()
    optimizer.step()


tensor([[0.5004],
        [0.5000],
        [0.5000],
        [0.4996]], grad_fn=<SigmoidBackward0>)
tensor(0.6932, grad_fn=<BinaryCrossEntropyBackward0>)
tensor([[0.5004],
        [0.5000],
        [0.5000],
        [0.4996]], grad_fn=<SigmoidBackward0>)
tensor(0.6932, grad_fn=<BinaryCrossEntropyBackward0>)
tensor([[0.5004],
        [0.5000],
        [0.5000],
        [0.4996]], grad_fn=<SigmoidBackward0>)
tensor(0.6932, grad_fn=<BinaryCrossEntropyBackward0>)
tensor([[0.5004],
        [0.5000],
        [0.5000],
        [0.4996]], grad_fn=<SigmoidBackward0>)
tensor(0.6932, grad_fn=<BinaryCrossEntropyBackward0>)
tensor([[0.5004],
        [0.5000],
        [0.5000],
        [0.4996]], grad_fn=<SigmoidBackward0>)
tensor(0.6932, grad_fn=<BinaryCrossEntropyBackward0>)
tensor([[0.5004],
        [0.5000],
        [0.5000],
        [0.4996]], grad_fn=<SigmoidBackward0>)
tensor(0.6932, grad_fn=<BinaryCrossEntropyBackward0>)
tensor([[0.5004],
        [0.5000],
        [0.5000],
        [0.4996]