In [151]:
import torch
from torch import nn
from torch.utils.data import Dataset,DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor,Lambda
from torch.optim import Adam, SGD
import matplotlib.pyplot as plt


training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
    # encode the label into one-hot values (needed sometimes for calculating loss func)
    # target_transform = Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
    # target_transform = Lambda(lambda y: torch.zeros(10, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))
)

In [152]:
lr = 1e-3
batch_size = 64
train_epoch = 10

In [153]:
# wrap the data into dataloader type so that we can iterate through the datas and batches
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data,batch_size=batch_size, shuffle=True)

In [154]:
print(len(train_dataloader.dataset)) # the number of data entries in the dataloader
print(len(test_dataloader.dataset))
# the number of batches, which equals to len(train_dataloader.dataset)/batch_size, 
# batch_size is initialized when initializing dataloader
# ex) 10 = 60000/6000
print(len(train_dataloader))
print(len(test_dataloader))


60000
10000
938
157


In [155]:
# # the shape of the first 2 batches of the training dataset/ test dataset
# for X,y in train_dataloader:
#     # print(X.shape)
#     # print(y.shape)
#     Xt,yt = next(iter(test_dataloader))
#     # print(Xt[0])
#     print(yt[0])

# Construct a MLP model

In [156]:
# define the model, only a MLP

class MyNN(nn.Module):
    def __init__(self, img_shape, unit_dim=512):
        super(MyNN,self).__init__()
        self.img_shape = img_shape
        self.unit_dim = unit_dim
    
        self.linear = nn.Sequential(
            nn.Flatten(), # 28x28 -> 784, can include flatten layer into the sequential layer
            nn.Linear(img_shape[0]*img_shape[1],unit_dim), # 784 -> 512
            nn.ReLU(),
            # nn.MaxPool1d(stride=5,padding=3,kernel_size=2),
            nn.Linear(unit_dim,unit_dim),
            nn.ReLU(),
            nn.Linear(unit_dim,10), # 512 -> 10
            # nn.Softmax(dim=1) # change this shape 10 from digit form to prob form (softmax)
        )
        

    def forward(self, img):
        logist = self.linear(img) # can not output the result directly, loss_fn need to use its original shape to compute loss
        return logist

In [157]:
sample_img, sample_label = next(iter(train_dataloader))
img_shape = sample_img.shape[2],sample_img.shape[3]
model = MyNN(img_shape)
loss_fn = nn.CrossEntropyLoss()
# optimizer = Adam(model.parameters(),lr=lr)
optimizer = SGD(model.parameters(), lr=lr)

In [158]:
def train_loop(train_dataloader, optimizer, loss_fn, model):
    total_size = len(train_dataloader.dataset)
    for batch_idx, (X,y) in enumerate(train_dataloader):
        # get the current loss
        pred = model(X)
        loss = loss_fn(pred,y)

        # back prop & update weight matrices
        # remember to clean up previous grad by zero_grad()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # if batch_idx % 5000 == 0: # for every 10 batch, we report once the result
        # print(f"Current loss: {loss:>7f}, Current progress: [{(batch_idx+1) * len(y):>5d}/{total_size:>5d}]")

def test_loop(test_dataloader, model):
    correct = 0
    size = len(test_dataloader.dataset)
    with torch.no_grad():
        for X,y in test_dataloader:
            pred = model(X) # pred is in shape of batch_size x num_of_classes, y is in shape of batch_size x 1, need argmax

            # pred.argmax(1) == y will return boolean values of each paring entry, .type will change false -> 0, true -> 1, and we sum them up
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    correct /= size
    print(f"Test acc: {correct}")


In [159]:
for i in range(train_epoch):
    train_loop(train_dataloader,optimizer,loss_fn,model)
    test_loop(test_dataloader,model)

Test acc: 0.4206
Test acc: 0.5804
Test acc: 0.6133
Test acc: 0.6346
Test acc: 0.6457
Test acc: 0.6521
Test acc: 0.6662
Test acc: 0.6769
Test acc: 0.6942
Test acc: 0.7112


# Save the Trained Model

In [161]:
import torchvision.models as models
torch.save(model.state_dict(),"model_weights.pth")

In [164]:
# save it with weights
model2 = MyNN((28,28))
# before loading the weight from existing model
test_loop(test_dataloader,model2)

# after loading
model2.load_state_dict(torch.load('model_weights.pth'))
test_loop(test_dataloader,model2)

Test acc: 0.0587
Test acc: 0.7112


In [166]:
# save it as a whole model shape
torch.save(model,'model.pth')
model3 = torch.load('model.pth')
test_loop(test_dataloader,model3)

Test acc: 0.7112
