In [1]:
import torch
import torch.nn as nn
from sklearn.datasets import fetch_openml

In [2]:
device = "cuda"

In [3]:
import pickle
dataset = fetch_openml("mnist_784")
with open("data.pickle", "wb") as fw:
  pickle.dump(dataset, fw)

# with open("data.pickle", "rb") as fr:
#   dataset = pickle.load(fr)

In [16]:
import torch.nn.functional as F

data, target = dataset.data.to_numpy(), dataset.target.to_numpy(dtype="float")
data = torch.FloatTensor(data).to(device) / 255.
target = torch.FloatTensor(target).long().to(device)

In [17]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.1, shuffle=True)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, shuffle=True)

In [18]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class CustomerDataset(Dataset):
  def __init__(self, data, target):
    self.data = data
    self.target = target
  
  def __len__(self):
    return len(self.data)
  
  def __getitem__(self, idx):
    return self.data[idx], self.target[idx]

In [19]:
train_dataset = CustomerDataset(X_train, y_train)
val_dataset = CustomerDataset(X_val, y_val)
test_dataset = CustomerDataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=64)
val_dataloader = DataLoader(val_dataset, batch_size=64)
test_dataloader = DataLoader(test_dataset, batch_size=64)

In [20]:
import matplotlib.pyplot as plt

def test_model(model, dataloader):
  count = 0
  dataloader_size = len(dataloader) if len(dataloader) <= 20 else 20
  
  plt.figure(figsize=(8, 8 * dataloader_size))
  for i, (X, y) in enumerate(dataloader):
    pred = model(X)
    pred_ = torch.argmax(pred, dim=1)
    y_ = torch.argmax(y, dim=1)
    count += (pred_ == y_).sum()

    if i < 20:
      plt.subplot(dataloader_size, 1, i+1)
      plt.title(f"Answer : {y_[0]} / Prediction : {pred_[0]}")
      plt.imshow(X[0].to("cpu").view(28, 28))
  return count

# LOW LEVEL CODE

In [None]:
def cross_entropy_loss(pred, target):
  temp = -(target * torch.log(pred + 1e-10))
  temp = temp.sum(axis=1).mean()
  return temp

In [None]:
def forward(X, W1, b1, W2, b2):
  out = torch.matmul(X, W1) + b1
  out = F.relu(out)
  out = torch.matmul(out, W2) + b2
  return out

In [None]:
W1 = torch.randn((784, 300), requires_grad=True, device=device)
b1 = torch.zeros(300, requires_grad=True, device=device)
W2 = torch.randn((300, 10), requires_grad=True, device=device)
b2 = torch.zeros(10, requires_grad=True, device=device)

In [None]:
optimizer = torch.optim.SGD([W1, W2, b1, b2], lr=0.01)

In [None]:
loss_fn = nn.CrossEntropyLoss()

In [None]:
epochs = 100 
train_losses = []
val_losses = []
for epoch in range(epochs):
  print(f"Epoch: {epoch} / {epochs} =======")
  temp_cost = 0.
  for X, y in train_dataloader:
    # forward
    pred = forward(X, W1, b1, W2, b2)
    pred = F.softmax(pred, dim=1)
    # Cost cal
    cost = loss_fn(pred, y)
    # cost = cross_entropy_loss(pred, y)
    # backpropagation
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    temp_cost += cost.item()
    # print(cost)
  temp_cost /= len(train_dataloader)
  train_losses.append(temp_cost)

  temp_cost = 0.
  for X, y in val_dataloader:
    pred = forward(X, W1, b1, W2, b2)
    pred = F.softmax(pred, dim=1)
    # cost = cross_entropy_loss(pred, y)
    cost = loss_fn(pred, y)
    temp_cost += cost.item()
  temp_cost /= len(val_dataloader)
  val_losses.append(temp_cost)

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_losses, c="red")
plt.plot(val_losses, c="blue")
plt.show()

# HIGH LEVEL CODE

In [26]:
import torch.nn as nn

class Model1(nn.Module):
  def __init__(self):
    super(Model1, self).__init__()
    self.layer1 = nn.Linear(784, 512)
    self.activate1 = nn.ReLU()
    self.layer2 = nn.Linear(512, 512)
    self.activate2 = nn.ReLU()
    self.layer3 = nn.Linear(512, 10)
  
  def forward(self, X):
    out = self.layer1(X)
    out = self.activate1(out)
    out = self.layer2(out)
    out = self.activate2(out)
    out = self.layer3(out)
    return out
model = Model1()
model.cuda()

Model1(
  (layer1): Linear(in_features=784, out_features=512, bias=True)
  (activate1): ReLU()
  (layer2): Linear(in_features=512, out_features=512, bias=True)
  (activate2): ReLU()
  (layer3): Linear(in_features=512, out_features=10, bias=True)
)

In [22]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [23]:
scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: 99 ** epoch)

In [24]:
loss_fn = nn.CrossEntropyLoss().to(device)

In [25]:
epochs = 25 
train_losses = []
val_losses = []
for epoch in range(epochs):
  print(f"Epoch: {epoch+1} / {epochs} =======")
  temp_cost = 0.
  model.train()
  for i, (X, y) in enumerate(train_dataloader):
    # forward
    pred = model.forward(X)
    # Cost cal
    cost = loss_fn(pred, y)
    # backpropagation
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    temp_cost += cost.item()
    # print(cost)
  temp_cost /= len(train_dataloader)
  train_losses.append(temp_cost)

  temp_cost = 0.
  model.eval()
  for X, y in val_dataloader:
    X = X.to(device)
    y = y.to(device)
    pred = model.forward(X)
    cost = loss_fn(pred, y)
    temp_cost += cost.item()
  temp_cost /= len(val_dataloader)
  val_losses.append(temp_cost)
  scheduler.step()



RuntimeError: ignored

In [None]:
import matplotlib.pyplot as plt

plt.plot(train_losses, c="red")
plt.plot(val_losses, c="blue")
plt.show()

In [None]:
test_model(model, test_dataloader)

In [None]:
plt.figure(figsize=(10, 100))
for i, (X, y) in enumerate(test_dataloader):
  plt.subplot(10, 1, i+1)
  plt.imshow(X[1].reshape(28,28).to("cpu"))
  model.eval()
  pred = model(X[1])

In [None]:
count = 0
for X, y in test_dataloader:
  answer = torch.argmax(y, dim=1)
  pred = model(X)
  pred2 = torch.argmax(pred, dim=1)
  count += (pred2 == answer).sum()
print(f"{count} / {len(test_dataloader.dataset)}")