In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torchvision

from torch.utils.tensorboard import SummaryWriter

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
class CNN(nn.Module):
    def __init__(self, in_channels = 3, num_classes = 10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels = in_channels, out_channels = 8, kernel_size = (3, 3), stride = (1, 1), padding = (1, 1))
        self.pool = nn.MaxPool2d(kernel_size = (2, 2), stride = (2, 2))
        self.conv2 = nn.Conv2d(in_channels = 8, out_channels = 16, kernel_size = (3, 3), stride = (1, 1), padding = (1, 1))
        self.fc1 = nn.Linear(16 * 7 * 7, num_classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)
        return x

In [6]:
# Hyperparameters
in_channels = 1
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 1

In [7]:
# DataLoader
batch_size = 64
train_dataset = datasets.MNIST(root = "dataset/", train = True, transform = transforms.ToTensor(), download = True)
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)

test_dataset = datasets.MNIST(root = "dataset/", train = False, transform = transforms.ToTensor(), download = True)
test_loader = DataLoader(dataset = test_dataset, batch_size = batch_size, shuffle = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:01<00:00, 6269821.23it/s]


Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 8924754.57it/s]


Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 2675635.39it/s]


Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 1962555.76it/s]


Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw



In [48]:
writer = SummaryWriter(f"runs/MNIST/tryingTensorBoard")

In [12]:
x = torch.rand(64, 28, 28)
print(x.shape)
print(x.unsqueeze(1).shape)

torch.Size([64, 28, 28])
torch.Size([64, 1, 28, 28])


Parameter containing:
tensor([[[[ 0.2746,  0.0790, -0.0251],
          [ 0.0658,  0.0482,  0.1482],
          [-0.1356, -0.0758, -0.2823]]],


        [[[ 0.0337, -0.0841, -0.1851],
          [-0.3254, -0.0625,  0.1877],
          [ 0.1947, -0.1102, -0.2207]]],


        [[[ 0.2357,  0.0770, -0.2787],
          [-0.2529, -0.2276, -0.1744],
          [-0.3317,  0.1455, -0.0373]]],


        [[[ 0.0834, -0.0969, -0.0423],
          [-0.1841,  0.2436, -0.2430],
          [ 0.2959, -0.0371, -0.1348]]],


        [[[ 0.0729,  0.2497,  0.0038],
          [ 0.0031,  0.3104,  0.2222],
          [ 0.0179,  0.2325,  0.3041]]],


        [[[-0.3180,  0.3244, -0.2659],
          [-0.2809,  0.1037,  0.2163],
          [-0.1241, -0.0637,  0.3039]]],


        [[[-0.2209, -0.2882, -0.0856],
          [ 0.2616, -0.1542, -0.1353],
          [ 0.1673,  0.1327,  0.2075]]],


        [[[ 0.1201, -0.2517,  0.1041],
          [ 0.3119,  0.2950,  0.2512],
          [ 0.0006, -0.0916,  0.1785]]]], device='cud

In [88]:
model.state_dict()["conv1.weight"]

tensor([[[[ 0.2746,  0.0790, -0.0251],
          [ 0.0658,  0.0482,  0.1482],
          [-0.1356, -0.0758, -0.2823]]],


        [[[ 0.0337, -0.0841, -0.1851],
          [-0.3254, -0.0625,  0.1877],
          [ 0.1947, -0.1102, -0.2207]]],


        [[[ 0.2357,  0.0770, -0.2787],
          [-0.2529, -0.2276, -0.1744],
          [-0.3317,  0.1455, -0.0373]]],


        [[[ 0.0834, -0.0969, -0.0423],
          [-0.1841,  0.2436, -0.2430],
          [ 0.2959, -0.0371, -0.1348]]],


        [[[ 0.0729,  0.2497,  0.0038],
          [ 0.0031,  0.3104,  0.2222],
          [ 0.0179,  0.2325,  0.3041]]],


        [[[-0.3180,  0.3244, -0.2659],
          [-0.2809,  0.1037,  0.2163],
          [-0.1241, -0.0637,  0.3039]]],


        [[[-0.2209, -0.2882, -0.0856],
          [ 0.2616, -0.1542, -0.1353],
          [ 0.1673,  0.1327,  0.2075]]],


        [[[ 0.1201, -0.2517,  0.1041],
          [ 0.3119,  0.2950,  0.2512],
          [ 0.0006, -0.0916,  0.1785]]]], device='cuda:0')

In [13]:
def train(model, train_loader, loss_fn, optimizer, device, num_epochs):


    accuracies = []
    losses = []
    classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
    model.train()
    for epoch in range(num_epochs):
        for batch_idx, (data, targets) in enumerate(train_loader):
            data = data.to(device = device)
            targets = targets.to(device = device)

            # forward
            scores = model(data)
            loss = loss_fn(scores, targets)

            # backward
            optimizer.zero_grad()
            loss.backward()

            # gradient descent or adam step
            optimizer.step()

            num_correct = (scores.argmax(dim = 1) == targets).sum()
            accuracy = num_correct / data.shape[0]
            accuracies.append(accuracy)
            losses.append(loss)

            # Scalers
            writer.add_scalar("Training loss", loss, global_step =batch_idx)
            writer.add_scalar("Number of correct", num_correct, global_step =batch_idx)
            writer.add_scalar("Accuracy", accuracy, global_step =batch_idx)
            
            # Visualizing images and weights
            img_grid = torchvision.utils.make_grid(data)
            writer.add_image("MNIST images", img_grid, batch_idx)
            writer.add_histogram("conv1", model.conv1.weight, global_step =batch_idx)
            writer.add_histogram("fc1", model.fc1.weight, global_step =batch_idx)

            # Visualize embeddings
            features = data.reshape(data.shape[0], -1)
            class_labels = [classes[label] for label in targets]
            writer.add_embedding(features, metadata = class_labels, label_img = data, global_step =batch_idx)
            
        # Hyperparams
        writer.add_hparams({"lr": learning_rate, "bsize": batch_size}, {"accuracy" : sum(accuracies) / len(accuracies), "loss" : sum(losses) / len(losses)})


# Hyperparameter Tuning

In [14]:
# Hyperparameter tuning
num_epochs = 1
batch_sizes = [32, 64, 128]
learning_rates = [0.001, 0.0001]

for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        model = CNN(in_channels = in_channels, num_classes = num_classes).to(device)
        optimizer = optim.Adam(model.parameters(), lr = learning_rate)
        train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)
        writer = SummaryWriter(f"runs/MNIST/tryingTensorBoard/batch_size={batch_size}, lr={learning_rate}")
        train(model, train_loader, loss_fn, optimizer, device, num_epochs)



KeyboardInterrupt: 

# Visualizing Images, Weights and Embeddings

In [15]:
num_epochs = 1
batch_size = 256
learning_rate = 0.001


model = CNN(in_channels = in_channels, num_classes = num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)
train_loader = DataLoader(dataset = train_dataset, batch_size = batch_size, shuffle = True)

writer = SummaryWriter(f"runs/MNIST/tryingTensorBoard/images_weights")
train(model, train_loader, loss_fn, optimizer, device, num_epochs)