In [2]:
import torchvision.transforms as transforms
import torch.optim as optim
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
from tqdm.notebook import tqdm
from torch.nn import functional as F

device = 'cuda'

In [3]:
class Inception(nn.Module):
    # c1 --c4 are the number of output channels for each branch
    def __init__(self, c1, c2, c3, c4, **kwargs):
        super().__init__(**kwargs)
        self.b1_1 = nn.LazyConv2d(c1, 1)

        self.b2_1 = nn.LazyConv2d(c2[0], 1)
        self.b2_2 = nn.LazyConv2d(c2[1], 3, 1, 1)

        self.b3_1 = nn.LazyConv2d(c3[0], 1)
        self.b3_2 = nn.LazyConv2d(c3[1], 5, 1, 2)

        self.b4_1 = nn.MaxPool2d(3, 1, 1)
        self.b4_2 = nn.LazyConv2d(c4, 1)

    def forward(self, x):
        b1 = F.relu(self.b1_1(x))
        b2 = F.relu(self.b2_2(F.relu(self.b2_1(x))))
        b3 = F.relu(self.b3_2(F.relu(self.b3_1(x))))
        b4 = F.relu(self.b4_2(self.b4_1(x)))
        return torch.cat((b1,b2,b3,b4), dim=1)


In [4]:
class GoogLeNet(nn.Module):
    def b1(self):
        return nn.Sequential(
            nn.LazyConv2d(64, 7, 2, 3), nn.ReLU(), nn.MaxPool2d(3,2,1)
        )
    def b2(self):
        return nn.Sequential(
            nn.LazyConv2d(64, 1), nn.ReLU(),
            nn. LazyConv2d(192, 3, 1, 1), nn.ReLU(),
            nn.MaxPool2d(3,2,1)
        )

    def b3(self):
        return nn.Sequential(
            Inception(64, (96, 128), (16, 32), 32),
            Inception(128, (128, 192), (32, 96), 64),
            nn.MaxPool2d(3, 2, 1)

        )
    def b4(self):
        return nn.Sequential(
            Inception(192, (96, 208), (16, 48), 64),
            Inception(160, (112, 224), (24, 64), 64),
            Inception(128, (128, 256), (24, 64), 64),
            Inception(112, (144, 288), (32, 64), 64),
            Inception(256, (160, 320), (32, 128), 128),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
    def b5(self):
        return nn.Sequential(
            Inception(256, (160, 320), (32, 128), 128),
            Inception(384, (192, 384), (48, 128), 128),
            nn.AdaptiveAvgPool2d((1,1)), nn.Flatten()
        )

    def __init__(self, num_classes = 10):
        super().__init__()
        self.net = nn.Sequential(
            self.b1(), self.b2(), self.b3(), self.b4(),
            self.b5(), nn.LazyLinear(num_classes))
        self.net.apply(init_cnn)

    def apply_init(self, inputs, init=None):
        self.forward(*inputs)
        if init is not None:
            self.net.apply(init)

    def forward(self, x):
        return self.net(x)



def init_cnn(module):
    if type(module) == nn.Linear or type(module) == nn.Conv2d:
        nn.init.xavier_uniform_(module.weight)


In [5]:
import algos
train_loader, _ = algos.load_mnist()
model = algos.fit(GoogLeNet(), train_loader)



  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [1/10], Average Loss: 0.7983


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [2/10], Average Loss: 0.3494


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [3/10], Average Loss: 0.2831


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [4/10], Average Loss: 0.2463


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [5/10], Average Loss: 0.2225


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [6/10], Average Loss: 0.2049


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [7/10], Average Loss: 0.1905


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [8/10], Average Loss: 0.1754


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [9/10], Average Loss: 0.1614


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [10/10], Average Loss: 0.1534


In [1]:
def load_mnist(batch_size = 128, resize = (96,96)):
    batch_size = 128

    # Load the FashionMNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,)), transforms.Resize((96,96))])
    train_dataset = FashionMNIST(root='./data', train=True, transform=transform, download=True)
    test_dataset = FashionMNIST(root='./data', train=False, transform=transform)
    train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader
train_loader, test_loader = load_mnist()

NameError: name 'transforms' is not defined

In [25]:
model = GoogLeNet().to(device)
input_data = next(iter(train_loader))[0].to(device)
model.apply_init([input_data], init_cnn)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), 0.001)

total_step = len(train_loader)
for epoch in range(10):
    epoch_loss = 0.0
    for i, (images, labels) in tqdm(enumerate(train_loader), total=total_step):
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    # Print average epoch loss
    average_loss = epoch_loss / total_step
    print(f"Epoch [{epoch+1}/10], Average Loss: {average_loss:.4f}")



  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [1/10], Average Loss: 0.8351


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [2/10], Average Loss: 0.3778


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [3/10], Average Loss: 0.3034


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [4/10], Average Loss: 0.2672


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [5/10], Average Loss: 0.2421


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [6/10], Average Loss: 0.2243


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [7/10], Average Loss: 0.2057


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [8/10], Average Loss: 0.1882


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [9/10], Average Loss: 0.1786


  0%|          | 0/469 [00:00<?, ?it/s]

Epoch [10/10], Average Loss: 0.1704
