This file is aim to illustrate the basic syntax of pytorch. 

In [10]:
import torch
from torch.utils.data import Dataset
from PIL import Image
import os
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from torchvision import transforms
import torchvision
from torch.utils.data import DataLoader
import cv2
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Conv2d
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import Linear
from torch.nn import Flatten
from torch.nn import Sequential
from torch.nn import L1Loss

Two useful function: dir() and help(). The first function can list all the functions in the library. The second one can introduce the functionality of a specific function. 

In [2]:
# The relationship is torch -> cuda -> cuda.is_available
torch.cuda.is_available()
dir(torch)
dir(torch.cuda)
dir(torch.cuda.is_available)
help(torch.cuda.is_available)

Help on function is_available in module torch.cuda:

is_available() -> bool
    Return a bool indicating if CUDA is currently available.



How pytorch loads data? There are two libraries:
1. Dataset: provide access to data and its label
2. Dataload: package the data and provide them for the following netural networks

For example, we want to divide pictures into two parts: ants and bees, ants and bees are their label.

In [3]:
# Define our own class of dataset
class MyData(Dataset):
    # iniliatize the dataset
    def __init__(self, root_dir, label_dir):
        self.root_dir = root_dir
        self.label_dir = label_dir
        self.path = os.path.join(root_dir, label_dir)
        self.img_path = os.listdir(self.path)

    # get image and its label
    def __getitem__(self, index):
        img_name = self.img_path[index]
        img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
        img = Image.open(img_item_path)
        label = self.label_dir

        return img, label
    
    # return the size of dataset
    def __len__(self):
        return len(self.img_path)
    
root_dir = "dataset/train"
ants_label_dir = "ants"
bees_label_dir = "bees"
ants_dataset = MyData(root_dir, ants_label_dir)
bees_dataset = MyData(root_dir, bees_label_dir)

ants_dataset.__len__()
ants_img, ants_label = ants_dataset.__getitem__(0)
#ants_img.show()
#print(ants_label)

bees_dataset.__len__()
bees_img, bees_label = bees_dataset.__getitem__(0)
#bees_img.show()
#print(bees_label)

# Union two dataset
train_dataset = ants_dataset + bees_dataset
print(len(train_dataset) == len(ants_dataset) + len(bees_dataset))
# Use index to call __get_item__
img, label = train_dataset[150] 


True


In [4]:
# Additionally I want to declare a method to write label of image to a .txt file

root_dir = "dataset/train"
target_dir = "ants"
# The output of img_path is a list with string like "12345.jpg"
img_path = os.listdir(os.path.join(root_dir, target_dir))
# split function is a function used to seperate string
# In this case target_dir.split("_") = ["ants"]
label = target_dir.split("_")[0]
out_dir = "ants_label"
for i in img_path:
    # use split to seperate different .jpg file
    file_name = i.split('.jpg')[0]
# Open file and write or read, two parameters are file_path and mode(read or write)
    with open(os.path.join(root_dir, out_dir, "{}.txt".format(file_name)), "w") as f:
        f.write(label)

target_dir = "bees"
img_path = os.listdir(os.path.join(root_dir, target_dir))
label = target_dir.split("_")[0]
out_dir = "bees_label"
for i in img_path:
    file_name = i.split(".jpg")[0]
    with open(os.path.join(root_dir, out_dir, "{}.txt".format(file_name)), "w") as f:
        f.write(label)

Tensorboard is a function used to demonstrate the interaction of one variable with another variable. For example, when we train AI, we want to show the realtionship between loss rate and training rounds.

In [None]:
# first we initialize a writer, which is preserved in file folder "logs"
writer = SummaryWriter("logs")
image_path = "data/train/ants_image/0013035.jpg"
img = Image.open(image_path)
img = np.array(img)
print(type(img))

# need three parameters: tag, img_tensor(torch.Tensor, numpy or string), global_step
# since the shape of numpy array is HWC(Height, Width, Channel), so you need to add a parameter: dataformats
writer.add_image("test", img, 2, dataformats='HWC')

# need three parameters: tag(title of the chart), scalar_value(y axis), global_step(x axis)
# The running result is saved in the "logs" folder under the current folder
# If you want to show the chart, you can run "tensorboard --logdir=logs --port==" in terminal, the default port is 6006
for i in range(100):
    writer.add_scalar("y = 2x", 2 * i, i)

writer.close()

<class 'numpy.ndarray'>


Transform is a python file used to process image. Input an image, use tools in transformer to resize and process and get a result.
Another important concept is tensor, which is in the form of high dimensional array, containing parameters needed for neutral network.

In [10]:
img_path = "data/train/ants_image/0013035.jpg"
# return a variable whose type is PIL image
img = Image.open(img_path)

# first you should initialize a transformer tool by using .ToTensor()
# second pass image parameter
tensor_trans = transforms.ToTensor()
tensor_img = tensor_trans(img)
print(tensor_img)

# return a variable whose type is numpy.ndarray
cv_img = cv2.imread(img_path)

writer = SummaryWriter("logs")
writer.add_image("transformer", tensor_img, 1)
writer.close()

tensor([[[0.3137, 0.3137, 0.3137,  ..., 0.3176, 0.3098, 0.2980],
         [0.3176, 0.3176, 0.3176,  ..., 0.3176, 0.3098, 0.2980],
         [0.3216, 0.3216, 0.3216,  ..., 0.3137, 0.3098, 0.3020],
         ...,
         [0.3412, 0.3412, 0.3373,  ..., 0.1725, 0.3725, 0.3529],
         [0.3412, 0.3412, 0.3373,  ..., 0.3294, 0.3529, 0.3294],
         [0.3412, 0.3412, 0.3373,  ..., 0.3098, 0.3059, 0.3294]],

        [[0.5922, 0.5922, 0.5922,  ..., 0.5961, 0.5882, 0.5765],
         [0.5961, 0.5961, 0.5961,  ..., 0.5961, 0.5882, 0.5765],
         [0.6000, 0.6000, 0.6000,  ..., 0.5922, 0.5882, 0.5804],
         ...,
         [0.6275, 0.6275, 0.6235,  ..., 0.3608, 0.6196, 0.6157],
         [0.6275, 0.6275, 0.6235,  ..., 0.5765, 0.6275, 0.5961],
         [0.6275, 0.6275, 0.6235,  ..., 0.6275, 0.6235, 0.6314]],

        [[0.9137, 0.9137, 0.9137,  ..., 0.9176, 0.9098, 0.8980],
         [0.9176, 0.9176, 0.9176,  ..., 0.9176, 0.9098, 0.8980],
         [0.9216, 0.9216, 0.9216,  ..., 0.9137, 0.9098, 0.

Here are some classes of Transformer:
1. ToTensor is a class which can turn an image into a tensor
2. Normalize is a class which can normalize a tensor by giving mean and std of n channels. For a picture, we often have RGB channels.
3. Resize is a class which can crop an PIL image, which return a PIL image
4. RandomCrop is a class which can crop the given image at a random location
5. Compose is a class which can put a lot of transformer classes together. For example, the following code put "RandomCrop" and "ToTensor" together, so the given image is first cropped at a random location, then turn the cropped image into a tensor

In [16]:
writer = SummaryWriter("logs")
img = Image.open("images/desktop.png")
print(img)

# ToTensor
trans_totensor = transforms.ToTensor()
# img_tensor is an object of class "ToTensor", which can turn an image into a tensor
img_tensor = trans_totensor(img)
writer.add_image("ToTensor", img_tensor)

# Normalize
trans_norm = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
img_norm = trans_norm(img_tensor)
writer.add_image("Normalize", img_norm)

# Resize
trans_resize = transforms.Resize((512, 512))
img_resize = trans_resize(img)
img_resize_tensor = trans_totensor(img_resize)
writer.add_image("Resize", img_resize_tensor)

# RandomCrop
trans_random = transforms.RandomCrop(512)
trans_compose = transforms.Compose([trans_random, trans_totensor])
for i in range(10):
    img_crop = trans_compose(img)
    writer.add_image("Random Crop", img_crop, i)


writer.close()


<PIL.PngImagePlugin.PngImageFile image mode=RGB size=2559x1439 at 0x175E1B39050>


We can use torchvision to download dataset

In [26]:
dataset_transform = transforms.Compose([
    transforms.ToTensor()
])

train_set = torchvision.datasets.CIFAR10(root="./datasets", train=True, download=True, transform=dataset_transform)
test_set = torchvision.datasets.CIFAR10(root="./datasets", train=False, download=True, transform=dataset_transform)

writer = SummaryWriter(log_dir="logs")
for i in range(10):
    img, target = test_set[i]
    writer.add_image("test_set", img, i)

writer.close()



Files already downloaded and verified
Files already downloaded and verified


DataLoader is a function used to load data. There are many parameters:
1. "dataset" is the directory of the dataset
2. "batch_size" is the number of data per batch to load
3. "shuffle" makes data reshuffled at every epoch when it is set "True"
4. "num_workers" is the number of subprocesses used to load data
5. "drop_last" can drop the last incomplete batch when it is set "True" if the size of dataset is not divisible by batch_size

In [33]:
test_dataset = torchvision.datasets.CIFAR10(root="./datasets", download=True, train=False, transform=transforms.ToTensor())

test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
# for every element in test loader, it is a tuple containing a list of images and a list of targets
# the shape of images is [batch_size, 3(3 channels), size, size]
# targets are a list of labels of these images
writer = SummaryWriter("logs")
step = 0
epoch = 0
for data in test_loader:
    imgs, targets = data
    # add images in one step
    writer.add_images(f"Epoch: {epoch}", imgs, step)
    step = step + 1

writer.close()

Files already downloaded and verified


Neutral Network
1. Define a class which inharit nn.Module class
2. forward is a function which can perform convolution operation

In [35]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, input):
        output = input + 1
        return output
    

model = Model()
input = torch.tensor(1.0)
output = model(input)
print(output)

tensor(2.)


1. Convolution operation often has an input image in the form of tensor and a convolution kernel, which is also in the form of tensor. Every time we do a multiplication, take a subtensor of input image which has the same size as kernel, multiply corresponding positions and get output. 
2. Stride is a variable which decide the step. The submatrix is extracted from left to right and from top to bottom, stride can decide the number of elements each time passed.
3. If we set "padding = 1", for a two dimensional tensor, fill in one row and one column each at the top, bottom, left, and right.
4. If "in_channel = 2", there are two layers of input tensor, if "out_channel = 2", there are two convolutional kernels and we get two layers of output tensor

In [None]:
input = torch.tensor([[1, 2, 0, 3, 1], 
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]])
kernel = torch.tensor([[1, 2, 1],
                       [0, 1, 0],
                       [2, 1, 0]])
# the input of convolution function should have four parameters: batch, channel, height, width
# reshape function can reshape a tensor, the original input is a two dimensional tensor, the new input is a four dimensional tensor
input = torch.reshape(input, (1, 1, 5, 5))
kernel = torch.reshape(kernel, (1, 1, 3, 3))

output = F.conv2d(input, kernel, stride=1)
print(output)

# use torchvision download a test dataset and define a neutral network
dataset = torchvision.datasets.CIFAR10("./datasets", download=True, train=False, transform=transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=64)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

    def forward(self, input):
        output = self.conv1(input)
        return output
    

model = Model()
writer = SummaryWriter("logs")
step = 0
for data in dataloader:
    # img.shape = torch.Size([64, 3, 32, 32])
    imgs, targets = data
    # output.shape = torch.Size([64, 6, 30, 30]), has 6 channels which cannot be displayed in tensorboard directly
    # perform forward operation autonomously, call __call__ function
    output = model(imgs)
    # if the value is set as -1, then the value can be calculated autonomously
    output = torch.reshape(output, (-1, 3, 30, 30))
    writer.add_images("input", imgs, step)
    step = step + 1
    writer.add_images("output", output, step)

writer.close()
    

tensor([[[[10, 12, 12],
          [18, 16, 16],
          [13,  9,  3]]]])
Files already downloaded and verified


Pooling Layer:
1. Pooling layer also has a kernal, each time take the maximal element of the corresponding submatrix of input as output
2. If the size of the remaining submatrix is smaller than the size of the kernel, take the maximum element in the submatrix instead of discarding the submatrix when "ceil mode" is True
3. "stride" is the same as convolution layer but the default value is kernal size
4. MaxPool is used to preserve key features in order to reduce network load

In [14]:
input = torch.tensor([[1, 2, 0, 3, 1],
                      [0, 1, 2, 3, 1],
                      [1, 2, 1, 0, 0],
                      [5, 2, 3, 1, 1],
                      [2, 1, 0, 1, 1]], dtype=torch.float32)
input = torch.reshape(input, (-1, 1, 5, 5))

dataset = torchvision.datasets.CIFAR10(root="./datasets", download=True, transform=transforms.ToTensor(), train=False)
dataloader = DataLoader(dataset=dataset, batch_size=64)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)

    def forward(self, input):
        output = self.maxpool1(input)
        return output
    

model = Model()
writer = SummaryWriter("logs")
step = 0

for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    # the number of channels remain after MaxPool, so we don't need to reshape output
    output = model(imgs)
    writer.add_images("output", output, step)
    step = step + 1

writer.close()


Files already downloaded and verified


Non-linear Activations are some nonlinear functions used to process data
1. ReLU(x) = max(0, x) ReLU has a parameter "inplace", whose default value is false. If it is true, the input will be inplaced by output. If it is false, the input will not be inplaced and we can get an output

In [None]:
input = torch.tensor([[1, -0.5],
                      [-1, 3]])
input = torch.reshape(input, (-1, 1, 2, 2))

dataset = torchvision.datasets.CIFAR10(root="./datasets", download=True, transform=transforms.ToTensor(), train=False)
dataloader = DataLoader(dataset, batch_size=64)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.relu = ReLU()

    def forward(self, input):
        output = self.relu(input)
        return output
    
writer = SummaryWriter(log_dir="logs")
model = Model()
step = 0

for data in dataloader:
    imgs, targets = data
    writer.add_images("input", imgs, step)
    output = model(imgs)
    writer.add_images("output", output)
    step = step + 1

writer.close()

Files already downloaded and verified


Linear Layer: y = xA^T + b

In [5]:
dataset = torchvision.datasets.CIFAR10(download=True, root="./datasets", transform=transforms.ToTensor(), train=False)
dataloader = DataLoader(dataset, batch_size=64, drop_last=True)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        # the first parameter is input feature, the second parameter is output feature
        # which can be interpreted as dimension
        self.linear = Linear(196608, 10)
    
    def forward(self, input):
        output = self.linear(input)
        return output
    

model = Model()

for data in dataloader:
    # the shape of images is [64, 3, 32, 32]
    imgs, targets = data
    # reshape the imgs to [[64*3*32*32]]
    output = torch.flatten(imgs)
    # the shape of output is [[10]]
    output = model(output)


Files already downloaded and verified


Sequential can put several network operations together such as convolution and linear

In [9]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, input):
        output = self.model(input)
        return output
    
model = Model()
input = torch.ones((64, 3, 32, 32))
output = model(input)
print(output.shape)


torch.Size([64, 10])


Loss function
1. gradient is used to optimize weight

In [None]:
input = torch.tensor([1, 2, 3], dtype=torch.float32)
output = torch.tensor([1, 2, 5], dtype=torch.float32)

input = torch.reshape(input, (1, 1, 1, 3))
output = torch.reshape(output, (1, 1, 1, 3))

loss = L1Loss(reduction="sum")
result = loss(input, output)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, input):
        output = self.model(input)
        return output
    

dataset = torchvision.datasets.CIFAR10(download=True, train=False, root="./datasets", transform=transforms.ToTensor())
dataloader = DataLoader(dataset, batch_size=1)

loss = nn.CrossEntropyLoss()
model = Model()
optim = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(20):
    running_loss = 0.0
    for data in dataloader:
        imgs, targets = data
        outputs = model(imgs)
        result_loss = loss(outputs, targets)
        # set zero to remain independent
        optim.zero_grad()
        result_loss.backward()
        optim.step()
        running_loss = running_loss + result_loss
    print(running_loss)

In [None]:
vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)

train_data = torchvision.datasets.CIFAR10(root='./datasets', download=True, train=True, transform=transforms.ToTensor())

# add to classifier
vgg16_true.classifier.add_module("Add_linear", Linear(1000, 10))
print(vgg16_true)



Files already downloaded and verified
VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_s

In [24]:
vgg16 = torchvision.models.vgg16(pretrained=False)
torch.save(vgg16, "vgg16_method1.pth")

torch.save(vgg16.state_dict(), "vgg16_method2.pth")



In [25]:
model = torch.load("vgg16_method1.pth")
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
print(model)
print(vgg16)

  model = torch.load("vgg16_method1.pth")


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

  vgg16.load_state_dict(torch.load("vgg16_method2.pth"))


In [33]:
# Prepare dataset
train_data = torchvision.datasets.CIFAR10(root='./datasets', train=True, download=True, transform=transforms.ToTensor())
test_data = torchvision.datasets.CIFAR10(root='./datasets', train=False, download=True, transform=transforms.ToTensor())

train_data_size = len(train_data)
test_data_size = len(test_data)

print(f"The size of train dataset is {train_data_size}")
print(f"The size of test dataset is {test_data_size}")

train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

# Construct neutral network
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, input):
        output = self.model(input)
        return output
    

model = Model()
# use GPU
if torch.cuda.is_available():
    model = model.cuda()

# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
# use GPU
loss_fn = loss_fn.cuda()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
writer = SummaryWriter("logs")

# Set parameters
total_train_epoch = 0
total_test_epoch = 0
epoch = 10

for i in range(epoch):
    print(f"Epoch: {i+1}")
    # train
    for data in train_dataloader:
        imgs, targets = data
        # use GPU
        imgs = imgs.cuda()
        targets = targets.cuda()
        output = model(imgs)
        loss = loss_fn(output, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_epoch = total_train_epoch + 1
        if (total_train_epoch % 100 == 0):
            print(f"train step: {total_train_epoch}, loss: {loss}")

    # guarantee no optimizing in order to test
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            # use GPU
            imgs = imgs.cuda()
            targets = targets.cuda()
            output = model(imgs)
            loss = loss_fn(output, targets)

            total_test_loss = total_test_loss + loss.item()
            accuracy = (output.argmax(1) == targets).sum()
            total_accuracy = total_accuracy + accuracy
    
    total_test_loss = total_test_loss * 64 / test_data_size
    total_accuracy = total_accuracy / test_data_size
    total_test_epoch = total_test_epoch + 1
    writer.add_scalar("test_loss", total_test_loss, total_test_epoch)

    print(f"The total loss is: {total_test_loss}, the total accuracy is: {total_accuracy}")

    torch.save(model, f"model_{i+1}.pth")


writer.close()


Files already downloaded and verified
Files already downloaded and verified
The size of train dataset is 50000
The size of test dataset is 10000
Epoch: 1
train step: 100, loss: 2.291564464569092
train step: 200, loss: 2.284550189971924
train step: 300, loss: 2.2580981254577637
train step: 400, loss: 2.1759283542633057
train step: 500, loss: 2.1407179832458496
train step: 600, loss: 2.0684690475463867
train step: 700, loss: 2.0098421573638916
The total loss is: 1.9777332412719726, the total accuracy is: 0.3060999810695648
Epoch: 2
train step: 800, loss: 1.8626993894577026
train step: 900, loss: 1.818153738975525
train step: 1000, loss: 1.9473388195037842
train step: 1100, loss: 1.9447749853134155
train step: 1200, loss: 1.6946173906326294
train step: 1300, loss: 1.6531866788864136
train step: 1400, loss: 1.7546414136886597
train step: 1500, loss: 1.8130407333374023
The total loss is: 1.805677434539795, the total accuracy is: 0.3520999848842621
Epoch: 3
train step: 1600, loss: 1.75621330

Here is the second method of using GPU

In [37]:
# define training device
device = torch.device("cuda")

# Prepare dataset
train_data = torchvision.datasets.CIFAR10(root='./datasets', train=True, download=True, transform=transforms.ToTensor())
test_data = torchvision.datasets.CIFAR10(root='./datasets', train=False, download=True, transform=transforms.ToTensor())

train_data_size = len(train_data)
test_data_size = len(test_data)

print(f"The size of train dataset is {train_data_size}")
print(f"The size of test dataset is {test_data_size}")

train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

# Construct neutral network
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.model = Sequential(
            Conv2d(3, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 32, 5, padding=2),
            MaxPool2d(2),
            Conv2d(32, 64, 5, padding=2),
            MaxPool2d(2),
            Flatten(),
            Linear(1024, 64),
            Linear(64, 10)
        )

    def forward(self, input):
        output = self.model(input)
        return output
    

model = Model()
model = model.to(device)

# Define loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
loss_fn = loss_fn.to(device)
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
writer = SummaryWriter("logs")

# Set parameters
total_train_epoch = 0
total_test_epoch = 0
epoch = 10

for i in range(epoch):
    print(f"Epoch: {i+1}")
    # train
    for data in train_dataloader:
        imgs, targets = data
        imgs = imgs.to(device)
        targets = targets.to(device)
        output = model(imgs)
        loss = loss_fn(output, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_epoch = total_train_epoch + 1
        if (total_train_epoch % 100 == 0):
            print(f"train step: {total_train_epoch}, loss: {loss}")

    # guarantee no optimizing in order to test
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_dataloader:
            imgs, targets = data
            imgs = imgs.to(device)
            targets = targets.to(device)
            output = model(imgs)
            loss = loss_fn(output, targets)

            total_test_loss = total_test_loss + loss.item()
            accuracy = (output.argmax(1) == targets).sum()
            total_accuracy = total_accuracy + accuracy
    
    total_test_loss = total_test_loss * 64 / test_data_size
    total_accuracy = total_accuracy / test_data_size
    total_test_epoch = total_test_epoch + 1
    writer.add_scalar("test_loss", total_test_loss, total_test_epoch)

    print(f"The total loss is: {total_test_loss}, the total accuracy is: {total_accuracy}")

    torch.save(model, f"model_{i+1}.pth")


writer.close()


Files already downloaded and verified
Files already downloaded and verified
The size of train dataset is 50000
The size of test dataset is 10000
Epoch: 1
train step: 100, loss: 2.2944676876068115
train step: 200, loss: 2.2946643829345703
train step: 300, loss: 2.2825851440429688
train step: 400, loss: 2.247307062149048
train step: 500, loss: 2.2149949073791504
train step: 600, loss: 2.129399061203003
train step: 700, loss: 2.0366742610931396
The total loss is: 2.0131363075256345, the total accuracy is: 0.2777000069618225
Epoch: 2
train step: 800, loss: 1.9144331216812134
train step: 900, loss: 1.867685079574585
train step: 1000, loss: 2.0080533027648926
train step: 1100, loss: 1.9736117124557495
train step: 1200, loss: 1.7371293306350708
train step: 1300, loss: 1.654584527015686
train step: 1400, loss: 1.7704800367355347
train step: 1500, loss: 1.8128730058670044
The total loss is: 1.932736734008789, the total accuracy is: 0.31459999084472656
Epoch: 3
train step: 1600, loss: 1.74237442