In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision.transforms as transforms
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt

In [2]:
path = './datasets/'

transform = transforms.Compose([transforms.ToTensor()])
# 1*28*28 -> 784 = 28*28
train_data = MNIST(root=path,train=True,transform=transform,download=True)
test_data = MNIST(root=path,train=False,transform=transform,download=True)

# choose train data with label 0 or 1
idx = (train_data.targets==0) | (train_data.targets==1)
train_data.targets = train_data.targets[idx]
train_data.data = train_data.data[idx]

# choose test data with label 0 or 1
idx = (test_data.targets==0) | (test_data.targets==1)
test_data.targets = test_data.targets[idx]
test_data.data = test_data.data[idx]

batch_size = 85

train_loader = DataLoader(dataset=train_data,batch_size=batch_size,shuffle=True)
test_loader = DataLoader(dataset=test_data,batch_size=len(test_data),shuffle=False)

In [3]:
#### HERE ####
# we are assuming one layer logistic regression
w = np.random.randn(784,1)
b = np.random.randn(1,1)
eta = 1e-4 # learning rate
delta = 1e-10 # prevent log 0


In [4]:
# define sigmoid function
def sigmoid(val):
    result=1/(1+np.exp(-val))
    return result

#### HERE ####
# define derivative of sigmoid function w.r.t. its value
def grad_sigmoid(val):
    return sigmoid(val)*(1-sigmoid(val))

# given data instances in batch form,
# compute loss and gradients of w and b
# also, count the number of correct prediction
def compute_loss_and_grad(data_instance):
    x, y = data_instance
    ## forward pass
    linear=np.matmul(x,w)+b
    y_est=sigmoid(linear)
    loss=-y*np.log(y_est+delta)-(1-y)*np.log(1-y_est+delta)
    
    ## grad computation
    grad=-y*(1-sigmoid(linear))+(1-y)*sigmoid(linear)
    grad_w=np.multiply(grad,w)
    grad_b=grad
    hit=(y==np.round(y_est))
    
    return loss,(grad_w,grad_b),hit ## 85개 size (==batch_size)


# update NN parameters w and b with SGD
def update_parameters(params,grads):
    w, b = params
    grad_w, grad_b = grads
    # fill out here and return the variables correctly anyway you want to    
    ##############
    #### HERE ####
    w-=eta*np.mean(grad_w,axis=0).reshape(-1,1)
    b-=eta*np,mean(grad_b,axis=0).reshape(-1,1)
    ##############
   
    return w, b
    

In [None]:
num_epoch = 500

for i in range(num_epoch):

    # train the logistic regression model
    total_loss_train = 0
    count = 0
    for batch_idx, (x, y) in enumerate(train_loader):
        # 85*1*28*28 -> -1=85,784
        # compute loss and gradients, and then update the parameters
        # also, compute sum of the loss and the number of correct prediction in the batch
        x, y = x.numpy().reshape(-1,784), y.numpy().reshape(-1,1)
        params = (w, b)
        loss,grad,hit=compute_loss_and_grad((x,y))
        w,b=update_parameters(params,grads)
        total_loss_train+=loss.sum()
        count+=hit.sum()
    
    #### HERE ####
    # compute average loss and accuracy for the train dataset
    loss_train = total_loss_train/len(train_data)
    acc_train = count/len(train_data)
    
    # test, or evaluate, the trained logistic regression model
    dataiter = iter(test_loader)
    te_images, te_labels = dataiter.next()
    
    te_images,te_labels=te_images.numpy().reshape(-1,784),te_labels.numpy().reshape(-1,1)
    
    loss,-,hit=compute_loss_and_grad((te_images,te_labels))
    total_loss_test=loss.sum()
    loss_test=total_loss_test/len(test_data)
    acc_test=hit.sum()/len(test_data)
    
    if i % 10 == 0:
        print("Epoch %d Train: %.3f / %.2f %%"%(i,loss_train,acc_train*100))
        print("Epoch %d Test: %.3f / %.2f %%"%(i,loss_test,acc_test*100))
        print()
