In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


## XOR

In [2]:
X= torch.FloatTensor([[0,0],[0,1],[1,0],[1,1]])
Y= torch.FloatTensor([[0],[1],[1],[0]])

In [3]:
linear1= torch.nn.Linear(2,2,bias=True)
linear2=torch.nn.Linear(2,1,bias=True)
sigmoid=torch.nn.Sigmoid()
model=torch.nn.Sequential(linear1,sigmoid,linear2,sigmoid)

In [4]:
criterion=torch.nn.BCELoss()
optimizer=torch.optim.SGD(model.parameters(),lr=1)
for step in range(10001):
    optimizer.zero_grad()
    hypothesis=model(X)
    cost=criterion(hypothesis,Y)
    cost.backward()
    optimizer.step()
    if step %100==0:
        print(step,cost.item())

0 0.7663730382919312
100 0.6933186054229736
200 0.6932241320610046
300 0.6931861639022827
400 0.693166971206665
500 0.6931553483009338
600 0.6931467056274414
700 0.6931384801864624
800 0.6931287050247192
900 0.6931148767471313
1000 0.6930917501449585
1100 0.6930487155914307
1200 0.6929554343223572
1300 0.6927096843719482
1400 0.691815197467804
1500 0.6860268712043762
1600 0.6282649636268616
1700 0.413867712020874
1800 0.12728853523731232
1900 0.06094200909137726
2000 0.03874177485704422
2100 0.028097189962863922
2200 0.02193659171462059
2300 0.017945772036910057
2400 0.01516011729836464
2500 0.013109810650348663
2600 0.011540025472640991
2700 0.010300795547664165
2800 0.009298373945057392
2900 0.008471278473734856
3000 0.007777562830597162
3100 0.007187572307884693
3200 0.006679747719317675
3300 0.006238165777176619
3400 0.005850786343216896
3500 0.005508147180080414
3600 0.005203068722039461
3700 0.004929674323648214
3800 0.004683325532823801
3900 0.004460167605429888
4000 0.004257162

## MNIST USING DNN

In [28]:
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import random
device = 'cuda' if torch.cuda.is_available() else 'cpu'
training_epochs = 15
batch_size = 100
mnist_train= dsets.MNIST(root='MNIST_data/',train=True, transform=transforms.ToTensor(),download=True)

In [29]:
mnist_test= dsets.MNIST(root='MNIST_data/',train=False, transform=transforms.ToTensor(),download=True)

In [30]:
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)

In [31]:
linear = torch.nn.Linear(784, 10, bias=True).to(device)
criterion=torch.nn.CrossEntropyLoss().to(device)
optimizer=torch.optim.SGD(linear.parameters(),lr=0.1)

In [32]:
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)

    for X, Y in data_loader:
        
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = linear(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.534764349
Epoch: 0002 cost = 0.359182149
Epoch: 0003 cost = 0.331175208
Epoch: 0004 cost = 0.316532463
Epoch: 0005 cost = 0.307358682
Epoch: 0006 cost = 0.300414354
Epoch: 0007 cost = 0.295144439
Epoch: 0008 cost = 0.290909857
Epoch: 0009 cost = 0.287505209
Epoch: 0010 cost = 0.284474164
Epoch: 0011 cost = 0.281967610
Epoch: 0012 cost = 0.279934585
Epoch: 0013 cost = 0.277872592
Epoch: 0014 cost = 0.276308477
Epoch: 0015 cost = 0.274320900
Learning finished


In [33]:
with torch.no_grad(): # not computing gradient    test data이니
    X_test=mnist_test.test_data.view(-1,28*28).float().to(device)
    Y_test=mnist_test.test_labels.to(device)
    prediction=linear(X_test)
    correct_prediction=torch.argmax(prediction,1) ==Y_test
    accuracy=correct_prediction.float().mean()
    print(accuracy.item())

0.8799999952316284


## weight initialization( xavier, He)

In [54]:
def xavier_uniform_(tensor,gain=1):
    fan_in,fan_out=_calculate_fan_in_and_fan_out(tensor)
    std=gain*math.sqrt(2.0/(fan_in+fan_out))
    a=math.sqrt(3.0)*std
    with torch.no_grad():
        return tensor.uniform_(-a,a)

In [55]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [56]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [57]:
linear1=torch.nn.Linear(784,256,bias=True)
linear2=torch.nn.Linear(256,256,bias=True)
linear3=torch.nn.Linear(256,10,bias=True)
relu=torch.nn.ReLU()

In [58]:
torch.nn.init.xavier_uniform_(linear1.weight) #package
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

Parameter containing:
tensor([[-0.0215, -0.0894,  0.0598,  ...,  0.0200,  0.0203,  0.1212],
        [ 0.0078,  0.1378,  0.0920,  ...,  0.0975,  0.1458, -0.0302],
        [ 0.1270, -0.1296,  0.1049,  ...,  0.0124,  0.1173, -0.0901],
        ...,
        [ 0.0661, -0.1025,  0.1437,  ...,  0.0784,  0.0977, -0.0396],
        [ 0.0430, -0.1274, -0.0134,  ..., -0.0582,  0.1201,  0.1479],
        [-0.1433,  0.0200, -0.0568,  ...,  0.0787,  0.0428, -0.0036]],
       requires_grad=True)

In [48]:
model = torch.nn.Sequential(linear1, relu, linear2, relu, linear3).to(device)

In [49]:
criterion = torch.nn.CrossEntropyLoss().to(device)    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [50]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.246783793
Epoch: 0002 cost = 0.093234085
Epoch: 0003 cost = 0.062067624
Epoch: 0004 cost = 0.043566491
Epoch: 0005 cost = 0.032832675
Epoch: 0006 cost = 0.026513761
Epoch: 0007 cost = 0.020483099
Epoch: 0008 cost = 0.017960707
Epoch: 0009 cost = 0.015639452
Epoch: 0010 cost = 0.014098583
Epoch: 0011 cost = 0.013838690
Epoch: 0012 cost = 0.011658964
Epoch: 0013 cost = 0.012876031
Epoch: 0014 cost = 0.009459957
Epoch: 0015 cost = 0.005646684
Learning finished


## dropout

In [59]:
linear1=torch.nn.Linear(784,256,bias=True)
linear2=torch.nn.Linear(256,256,bias=True)
linear3=torch.nn.Linear(256,10,bias=True)
relu=torch.nn.ReLU()
dropout=torch.nn.Dropout(p=0.2)

In [60]:
model = torch.nn.Sequential(linear1, relu, dropout,linear2, relu,dropout, linear3).to(device)

In [61]:
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 2.305563927
Epoch: 0002 cost = 2.305354595
Epoch: 0003 cost = 2.305301905
Epoch: 0004 cost = 2.305222988
Epoch: 0005 cost = 2.305372000
Epoch: 0006 cost = 2.305401564
Epoch: 0007 cost = 2.305252790
Epoch: 0008 cost = 2.305263281
Epoch: 0009 cost = 2.305448055
Epoch: 0010 cost = 2.305222750
Epoch: 0011 cost = 2.305374146
Epoch: 0012 cost = 2.305341005
Epoch: 0013 cost = 2.305399418
Epoch: 0014 cost = 2.305401564
Epoch: 0015 cost = 2.305389166
Learning finished
