In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
from torchvision.transforms import ToTensor
torch.__version__

'1.12.1+cu113'

In [None]:
train_FMNIST = FashionMNIST(root = "./", 
                            train = True, 
                            transform = ToTensor(), 
                            download = True)

train_dataloader = DataLoader(train_FMNIST, batch_size = 30, shuffle = True, drop_last = False)

# test
test_FMNIST = FashionMNIST(root = "./", 
                           train = False, 
                           transform = ToTensor(), 
                           download = True)

test_dataloader = DataLoader(test_FMNIST, batch_size = 30, shuffle = True, drop_last = False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting ./FashionMNIST/raw/train-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting ./FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting ./FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting ./FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./FashionMNIST/raw



In [None]:
class MyBlock(nn.Module):
    expansion = 4
    
    def __init__(self, in_channels, inter_channels, stride):
        super(MyBlock, self).__init__()
        conv1 = []
        conv1.append(nn.Conv2d(in_channels, inter_channels, 1, stride, 0))
        conv1.append(nn.BatchNorm2d(inter_channels))
        conv1.append(nn.ReLU())
        conv1.append(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1))
        conv1.append(nn.BatchNorm2d(inter_channels))
        conv1.append(nn.ReLU())
        conv1.append(nn.Conv2d(inter_channels, inter_channels * 2, 1, 1, 0))
        conv1.append(nn.BatchNorm2d(inter_channels * 2))
        self.conv1 = nn.Sequential(*conv1)
        
        conv2 = []
        conv2.append(nn.Conv2d(in_channels, inter_channels, 1, stride, 0))
        conv2.append(nn.BatchNorm2d(inter_channels))
        conv2.append(nn.ReLU())
        conv2.append(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1))
        conv2.append(nn.BatchNorm2d(inter_channels))
        conv2.append(nn.ReLU())
        conv2.append(nn.Conv2d(inter_channels, inter_channels, 3, 1, 1))
        conv2.append(nn.BatchNorm2d(inter_channels))
        conv2.append(nn.ReLU())
        conv2.append(nn.Conv2d(inter_channels, inter_channels * 2, 1, 1, 0))
        conv2.append(nn.BatchNorm2d(inter_channels * 2))
        self.conv2 = nn.Sequential(*conv2)
        
        short = []
        if stride != 1 or in_channels != inter_channels * self.expansion:
            short.append(nn.Conv2d(in_channels, inter_channels * self.expansion, 1, stride, 0))
            short.append(nn.BatchNorm2d(inter_channels * self.expansion))
        self.short = nn.Sequential(*short)
        self.relu = nn.ReLU()
    def forward(self, x): 
        out = torch.cat([self.conv1(x), self.conv2(x)], 1)
        x = self.short(x)
        out = self.relu(out + x)
        return out
        

In [None]:
class ResNet(nn.Module):
    def __init__(self, block, block_list):
        super(ResNet, self).__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(1, 64, 3, 1, 1), 
                                   nn.BatchNorm2d(64), 
                                   nn.ReLU(), 
                                   nn.MaxPool2d(3, 2, 1))
        
        self.block1 = self.make_layers(block, 64, 64, block_list[0], 1)
        self.block2 = self.make_layers(block, 256, 128, block_list[1], 1)
        self.block3 = self.make_layers(block, 512, 256, block_list[2], 1)
        self.block4 = self.make_layers(block, 1024, 512, block_list[3], 2)
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.flatten = nn.Flatten()
        self.drop = nn.Dropout(0.6)
        self.fc = nn.Linear(2048, 10)
        
    def make_layers(self, block , in_channels, inter_channels, blocks_num, stride):
        layers = []
        layers.append(block(in_channels, inter_channels, stride))
        
        in_channels = inter_channels * block.expansion
        for _ in range(blocks_num - 1):
            layers.append(block(in_channels,inter_channels , 1))
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.drop(x)
        x = self.fc(x)
        return x

In [None]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.cuda(), y.cuda()
        pred = model(X)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss ,current = loss.item(), batch * len(X)
            print(f"loss:{loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn, Train = False):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.cuda(), y.cuda()
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    if Train:
        print(f"Train Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    else:
        print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
def train_loop(model, epochs):
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),lr = 0.001)
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train(train_dataloader, model, loss_fn, optimizer)
        test(train_dataloader, model, loss_fn, Train = True)
        test(test_dataloader, model, loss_fn)
    print("Done!")

In [None]:
myres50 = ResNet(MyBlock, [3, 4, 6, 3])
train_loop(myres50.cuda(), 20)

Epoch 1
-------------------------------
loss:2.540559 [    0/60000]


KeyboardInterrupt: ignored