In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import fetch_openml
import numpy as np



In [41]:
if torch.cuda.is_available():
    DEVICE = 'cuda'
    torch.backends.cuda.matmul.allow_tf32 = True
else:
    DEVICE = 'cpu'

In [494]:
torch.backends.cuda.matmul.allow_tf32 = True

### (1) 숫자 이미지 

In [25]:
from sklearn.datasets import load_digits

X, y = fetch_openml('mnist_784', return_X_y=True, as_frame=False)
# X, y = load_digits(n_class=10, return_X_y=True, as_frame=False)


  warn(


In [495]:
X.shape, y.shape

((70000, 784), (70000,))

In [496]:
Xreshape = X.reshape(-1, 28, 28).astype(np.int64)

In [497]:
yastype = y.astype(np.int64)

In [498]:
Xt = torch.from_numpy(Xreshape).float()
yt = torch.from_numpy(yastype).long()
Xt.shape, yt.shape

(torch.Size([70000, 28, 28]), torch.Size([70000]))

In [499]:
if torch.cuda.is_available():
    Xt = Xt.cuda()
    yt = yt.cuda()

In [500]:
Xt = Xt.view(-1, 784)
Xt.is_contiguous()
yt.device

device(type='cuda', index=0)

In [518]:
ytunsq = yt.unsqueeze(1)
ytunsq.shape


ytunsq_hot = torch.zeros((yt.shape[0], 10), device=DEVICE).scatter(1, ytunsq, 1)
ytunsq_hot[10]

tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.], device='cuda:0')

In [502]:
yt_onehot = F.one_hot(yt, 10).float()
yt_onehot.device, yt_onehot.shape

(device(type='cuda', index=0), torch.Size([70000, 10]))

In [513]:
model = nn.Sequential(
    nn.Linear(784, 10, device=DEVICE),
    nn.Softmax(dim=1)
)

optimizer = optim.SGD(model.parameters(), lr=1e-3, foreach=True)



In [520]:
epoches = 500000

for epoch in range(epoches+1):
    y_pred = model(Xt)
    
    # print(yt.dtype)
    cost = F.cross_entropy(y_pred, yt_onehot)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    

    print(f"Epoch {epoch:4d} / {epoches:4d} Cost : {cost.item():6f} ") if epoch % 1000 == 0 else None

Epoch    0 / 500000 Cost : 1.616123 
Epoch 1000 / 500000 Cost : 1.616033 
Epoch 2000 / 500000 Cost : 1.615881 
Epoch 3000 / 500000 Cost : 1.615778 
Epoch 4000 / 500000 Cost : 1.615676 
Epoch 5000 / 500000 Cost : 1.615621 
Epoch 6000 / 500000 Cost : 1.615535 
Epoch 7000 / 500000 Cost : 1.615429 
Epoch 8000 / 500000 Cost : 1.615318 
Epoch 9000 / 500000 Cost : 1.615202 
Epoch 10000 / 500000 Cost : 1.615062 
Epoch 11000 / 500000 Cost : 1.614969 
Epoch 12000 / 500000 Cost : 1.614891 
Epoch 13000 / 500000 Cost : 1.614738 
Epoch 14000 / 500000 Cost : 1.614612 
Epoch 15000 / 500000 Cost : 1.614535 
Epoch 16000 / 500000 Cost : 1.614456 
Epoch 17000 / 500000 Cost : 1.614399 
Epoch 18000 / 500000 Cost : 1.614343 
Epoch 19000 / 500000 Cost : 1.614251 
Epoch 20000 / 500000 Cost : 1.614187 
Epoch 21000 / 500000 Cost : 1.614140 
Epoch 22000 / 500000 Cost : 1.614093 
Epoch 23000 / 500000 Cost : 1.614058 
Epoch 24000 / 500000 Cost : 1.614002 
Epoch 25000 / 500000 Cost : 1.613958 
Epoch 26000 / 500000 C

In [521]:
result = y_pred.max(dim=1)[1] == yt_onehot.max(dim=1)[1]

result.sum()/ result.shape[0]





tensor(0.8523, device='cuda:0')

### (2) white wine

In [431]:
X_wine, y_wine = fetch_openml('white_wine', as_frame=False, return_X_y=True)


  warn(


In [522]:
X_wine.shape, y_wine.shape, X_wine.dtype, y_wine.dtype

((4898, 11), (4898,), dtype('float64'), dtype('float64'))

In [523]:
Xt_wine = torch.from_numpy(X_wine.astype(np.float32))
yt_wine = torch.from_numpy(y_wine.astype(np.float32))

if torch.cuda.is_available():
    Xt_wine = Xt_wine.cuda()
    yt_wine = yt_wine.cuda()



In [524]:
# 가중치와 편향
W = torch.zeros(Xt_wine.shape[1],1, device=DEVICE, requires_grad=True)
b = torch.zeros((1), device=DEVICE, requires_grad=True)


In [525]:
print(f"W == >\n {W} \n\nb ==> {b}")

W == >
 tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0', requires_grad=True) 

b ==> tensor([0.], device='cuda:0', requires_grad=True)


In [526]:
optimizer = optim.SGD([W, b], lr=1e-6)

In [527]:
epochs = 100000
for epoch in range(epochs+1):

    hypo = Xt_wine @ W + b

    cost = torch.mean((hypo - yt_wine).pow(2))
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print(f"Epoch {epoch:4d} / {epochs:4d} Cost : {cost.item():6f}") if epoch % 1000 == 0 else None
    

Epoch    0 / 100000 Cost : 35.334015
Epoch 1000 / 100000 Cost : 3.517596
Epoch 2000 / 100000 Cost : 3.329509
Epoch 3000 / 100000 Cost : 3.155844
Epoch 4000 / 100000 Cost : 2.995155
Epoch 5000 / 100000 Cost : 2.846294
Epoch 6000 / 100000 Cost : 2.708298
Epoch 7000 / 100000 Cost : 2.580323
Epoch 8000 / 100000 Cost : 2.461611
Epoch 9000 / 100000 Cost : 2.351475
Epoch 10000 / 100000 Cost : 2.249284
Epoch 11000 / 100000 Cost : 2.154457
Epoch 12000 / 100000 Cost : 2.066458
Epoch 13000 / 100000 Cost : 1.984790
Epoch 14000 / 100000 Cost : 1.908993
Epoch 15000 / 100000 Cost : 1.838642
Epoch 16000 / 100000 Cost : 1.773341
Epoch 17000 / 100000 Cost : 1.712725
Epoch 18000 / 100000 Cost : 1.656455
Epoch 19000 / 100000 Cost : 1.604217
Epoch 20000 / 100000 Cost : 1.555720
Epoch 21000 / 100000 Cost : 1.510694
Epoch 22000 / 100000 Cost : 1.468887
Epoch 23000 / 100000 Cost : 1.430068
Epoch 24000 / 100000 Cost : 1.394022
Epoch 25000 / 100000 Cost : 1.360548
Epoch 26000 / 100000 Cost : 1.329462
Epoch 2700

In [549]:
row = 1230

Xt_wine[row] @ W + b, y_wine[row]

(tensor([6.0184], device='cuda:0', grad_fn=<AddBackward0>), 6.0)