In [11]:
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [12]:
X = torch.FloatTensor([[0, 0], [0, 1], [1, 0], [1, 1]]).to(device)
Y = torch.FloatTensor([[0], [1], [1], [0]]).to(device)

In [13]:
linear1 = torch.nn.Linear(2, 2)
linear2 = torch.nn.Linear(2, 1)
sigmoid = torch.nn.Sigmoid()

In [14]:
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)

In [15]:
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1)

In [16]:
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.7434073090553284
100 0.693165123462677
200 0.6931577920913696
300 0.6931517124176025
400 0.6931463479995728
500 0.6931411027908325
600 0.6931357383728027
700 0.6931294798851013
800 0.6931220889091492
900 0.6931126117706299
1000 0.6930999755859375
1100 0.693082332611084
1200 0.6930569410324097
1300 0.6930190324783325
1400 0.6929605603218079
1500 0.6928660273551941
1600 0.6927032470703125
1700 0.6923960447311401
1800 0.6917301416397095
1900 0.6899653673171997
2000 0.6838314533233643
2100 0.6561660766601562
2200 0.4310959577560425
2300 0.13489189743995667
2400 0.06630392372608185
2500 0.04216799512505531
2600 0.030453745275735855
2700 0.023665836080908775
2800 0.01927768811583519
2900 0.016223978251218796
3000 0.01398373395204544
3100 0.012273887172341347
3200 0.010928073897957802
3300 0.009842457249760628
3400 0.008949018083512783
3500 0.008201321586966515
3600 0.0075667379423975945
3700 0.007021641358733177
3800 0.006548580713570118
3900 0.006134224124252796
4000 0.00576837500557303

In [17]:
# with torch.no_grad(): 해줘야 쓸 데 없는 메모리 소비를 줄일 수 있음
with torch.no_grad():
    hypothesis = model(X)
    predicted = (hypothesis > 0.5).float()
    accuracy = (predicted == Y).float().mean()
    print('\nHypothesis: ', hypothesis.detach().cpu().numpy(), 
      '\nCorrect: ', predicted.detach().cpu().numpy(), 
      '\nAccuracy: ', accuracy.item())


Hypothesis:  [[0.00106364]
 [0.99889404]
 [0.99889404]
 [0.00165861]] 
Correct:  [[0.]
 [1.]
 [1.]
 [0.]] 
Accuracy:  1.0


In [18]:
# 10000 0.0005324085359461606
linear1 = torch.nn.Linear(2, 4)
linear2 = torch.nn.Linear(4, 1)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear2, sigmoid).to(device)
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    
    if step % 100 == 0:
        print(step, cost.item())

0 0.7119952440261841
100 0.6844919919967651
200 0.6300845742225647
300 0.457725465297699
400 0.20160670578479767
500 0.091575026512146
600 0.05363701656460762
700 0.0363164022564888
800 0.026747897267341614
900 0.020799733698368073
1000 0.016800204291939735
1100 0.013956522569060326
1200 0.0118485726416111
1300 0.010234227403998375
1400 0.008965292014181614
1500 0.007946285419166088
1600 0.007113165687769651
1700 0.006421503610908985
1800 0.005839738994836807
1900 0.005344723351299763
2000 0.004919325467199087
2100 0.004550454206764698
2200 0.004228024743497372
2300 0.00394423957914114
2400 0.0036927545443177223
2500 0.0034686452709138393
2600 0.003267864231020212
2700 0.0030870730988681316
2800 0.00292359571903944
2900 0.00277511915192008
3000 0.002639781218022108
3100 0.0025159758515655994
3200 0.002402352401986718
3300 0.002297756029292941
3400 0.0022011969704180956
3500 0.0021117909345775843
3600 0.002028863178566098
3700 0.0019516947213560343
3800 0.00187974632717669
3900 0.001812

In [44]:
model

Sequential(
  (0): Linear(in_features=2, out_features=2, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=2, out_features=2, bias=True)
  (3): Sigmoid()
  (4): Linear(in_features=2, out_features=1, bias=True)
  (5): Sigmoid()
)

In [45]:
model[0]

Linear(in_features=2, out_features=2, bias=True)

In [37]:
model[0].weight

Parameter containing:
tensor([[-9.2010, -8.9337],
        [ 4.4466,  4.4723]], requires_grad=True)

In [50]:
model[0].bias

Parameter containing:
tensor([ 5.5627, -5.1584], requires_grad=True)

In [21]:
# 10000 010000 0.00048230821266770363
linear1 = torch.nn.Linear(2, 2)
linear2 = torch.nn.Linear(2, 1)
sigmoid = torch.nn.Sigmoid()
model = torch.nn.Sequential(linear1, sigmoid, linear1, sigmoid, linear2, sigmoid)\
.to(device)
criterion = torch.nn.BCELoss().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr = 1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis = model(X)
    cost = criterion(hypothesis, Y)
    cost.backward()
    optimizer.step()
    

    if step % 100 == 0:
        print(step, cost.item())

0 0.7000895738601685
100 0.6924954652786255
200 0.6917352080345154
300 0.689288318157196
400 0.6775556802749634
500 0.563583254814148
600 0.3970608115196228
700 0.13214941322803497
800 0.041225939989089966
900 0.023287013173103333
1000 0.01601085439324379
1100 0.012127500027418137
1200 0.009728856384754181
1300 0.008106118068099022
1400 0.006937999278306961
1500 0.00605832040309906
1600 0.005372734274715185
1700 0.004823856055736542
1800 0.004374843556433916
1900 0.004000831861048937
2000 0.0036846641451120377
2100 0.0034140190109610558
2200 0.00317970709875226
2300 0.0029749441891908646
2400 0.002794526517391205
2500 0.002634389791637659
2600 0.00249128183349967
2700 0.002362700179219246
2800 0.002246518386527896
2900 0.0021410139743238688
3000 0.0020448544528335333
3100 0.001956827472895384
3200 0.0018759157974272966
3300 0.0018013258231803775
3400 0.0017323847860097885
3500 0.0016684341244399548
3600 0.0016089361160993576
3700 0.0015534715494140983
3800 0.0015016667312011123
3900 0.