In [1]:
import torch
from torch import nn, optim
from torch.utils.data import random_split,DataLoader,Subset
from torchvision.datasets import MNIST
import torchvision.transforms as T

In [2]:
torch.manual_seed(0)
x = torch.randn(1,5,4)
torch.manual_seed(0)
conv = nn.Conv2d(1,1,3,bias=False)
torch.manual_seed(0)
tconv = nn.ConvTranspose2d(1,1,3,bias=False)

In [3]:
for (name1,param1),(name2,param2) in zip(conv.named_parameters(),tconv.named_parameters()):
    param2 = -param1
    print(name1,name2,param1,param2)

weight weight Parameter containing:
tensor([[[[-0.0025,  0.1788, -0.2743],
          [-0.2453, -0.1284,  0.0894],
          [-0.0066,  0.2643, -0.0296]]]], requires_grad=True) Parameter containing:
tensor([[[[-0.0025,  0.1788, -0.2743],
          [-0.2453, -0.1284,  0.0894],
          [-0.0066,  0.2643, -0.0296]]]], requires_grad=True)


In [4]:
y = conv(x)
x_ = tconv(y)
x,x_

(tensor([[[-1.1258, -1.1524, -0.2506, -0.4339],
          [ 0.5988, -1.5551, -0.3414,  1.8530],
          [ 0.4681, -0.1577,  1.4437,  0.2660],
          [ 1.3894,  1.5863,  0.9463, -0.8437],
          [ 0.9318,  1.2590,  2.0050,  0.0537]]]),
 tensor([[[ 4.9849e-04, -3.8320e-02,  2.4126e-01, -2.8608e-01],
          [ 4.8425e-02, -1.8786e-01, -2.9079e-01,  2.0948e-01],
          [-5.3717e-02, -9.6137e-02,  5.4747e-01, -1.0152e-01],
          [ 1.5009e-01,  1.1379e-01, -1.8942e-01,  2.3219e-02],
          [ 4.0819e-03, -1.6414e-01,  4.9867e-02, -3.5351e-03]]],
        grad_fn=<SqueezeBackward1>))

In [None]:
ROOT = "./data_for_test"
batch_size = 1000
traindata = MNIST(ROOT, train=True,download=True,transform=T.ToTensor())
traindata, valdata = random_split(traindata,[50000,10000])


In [None]:
# shift val data
valdata.dataset.data[valdata.indices] = torch.roll(
    valdata.dataset.data[valdata.indices],
    shifts=(2, 2),
    dims=(1, 2),
)

In [None]:
trainloader = DataLoader(traindata,batch_size=batch_size,pin_memory=True,shuffle=True)
evalloader = DataLoader(valdata,batch_size=batch_size,pin_memory=True,shuffle=True)

In [None]:
len(trainloader),len(evalloader)

In [None]:
DEVICE = "cuda"

# setup training hyperparameters for the MLP
num_epochs = 10
learning_rate = 0.05
momentum = 0.9
linear_units = 30

mlp_model = nn.Sequential(
    nn.Linear(784, linear_units),
    nn.ReLU(),
    nn.Linear(linear_units, 10),
).to(DEVICE)
# create optimizer for the model
optimizer_mlp = optim.SGD(mlp_model.parameters(), lr=learning_rate, momentum=momentum)

# setup training hyperparameters
batch_size = 50
learning_rate = 0.01
# setup model hyperparameters
kernel_size = (4, 4)
stride = (2, 2)
padding = (1, 1)
n_filters_conv1 = 5
n_filters_conv2 = 10

conv_model = nn.Sequential(
    nn.Conv2d(1, n_filters_conv1, kernel_size, stride, padding),
    nn.ReLU(),
    nn.Conv2d(n_filters_conv1, n_filters_conv2, kernel_size, stride, padding),
    nn.ReLU(),
    nn.Flatten(),
    nn.Linear(490, 10),
).to(DEVICE)
optimizer_cnn = optim.SGD(conv_model.parameters(), lr=learning_rate, momentum=momentum)

lossfun = nn.CrossEntropyLoss()

def accuracy_score(y_hat, y):
    if len(y_hat) != len(y):
        raise ValueError("Lengths dont match")
    amax = y_hat.argmax(axis=1)
    return (amax==y).sum()/len(y_hat)

In [None]:
def train(
    model: nn.Module,
    loader: DataLoader,
    optimizer,
    num_epochs:int,
    lossfun,
    filename,
    flatten = False,
):
    with open(filename,"+a") as f:
        model.train()
        for epoch in range(num_epochs):
            print("Epoch {} / {}:".format(epoch + 1, num_epochs),file=f)
            avgloss = 0
            avgacc = 0
            for X, y in loader:
                X, y = X.to(DEVICE), y.to(DEVICE)
                if flatten:
                    X = X.reshape(-1,784)
                y_hat = model(X)
                loss = lossfun(y_hat, y)
                acc = accuracy_score(y_hat, y)
                avgloss += loss.item()
                avgacc += acc
                optimizer.zero_grad(set_to_none=True)
                loss.backward()
                optimizer.step()
            
            print("  Training Accuracy: {:.4f}".format(avgacc/len(loader)),file=f)
            print("  Training Cost: {:.4f}".format(avgloss/len(loader)),file=f)
def eval(
    model: nn.Module,
    loader: DataLoader,
    lossfun,
    filename,
    flatten = False,
):
    with open(filename,"+a") as f:
        model.eval()
        with torch.inference_mode():
            avgloss = 0
            avgacc = 0
            for X, y in loader:
                X, y = X.to(DEVICE), y.to(DEVICE)
                if flatten:
                    X = X.reshape(-1,784)
                y_hat = model(X)
                loss = lossfun(y_hat, y)
                acc = accuracy_score(y_hat, y)
                avgloss += loss.item()
                avgacc += acc

            print("  Training Accuracy: {:.4f}".format(avgacc/len(loader)),file=f)
            print("  Training Cost: {:.4f}".format(avgloss/len(loader)),file=f)

In [None]:
FILENAME = "result_shifted.txt"
with open(FILENAME,"+a") as f:
    print("MLP",file=f)
train(
    mlp_model,
    trainloader,
    optimizer_mlp,
    num_epochs,
    lossfun,
    FILENAME,
    flatten=True,
)
eval(
    mlp_model,
    evalloader,
    lossfun,
    FILENAME,
    flatten=True,
)
with open(FILENAME,"+a") as f:
    print("CNN",file=f)
train(
    conv_model,
    trainloader,
    optimizer_cnn,
    num_epochs,
    lossfun,
    FILENAME,
)
eval(
    conv_model,
    evalloader,
    lossfun,
    FILENAME,
)

In [None]:
import scipy as sp
import numpy as np

x = np.arange(1,5)
w = np.array([2,1,3])
y=np.array([2,4])
b=1
x,w

In [None]:
y_hat=np.correlate(x,w,mode="valid")+b

In [None]:
.5*(y-y_hat)@(y-y_hat)

In [None]:
lw=np.correlate(y_hat-y,x,mode="valid")[::-1]
lw

In [None]:
lb = (y_hat-y).sum()
lb

In [None]:
w_new = w-0.01*lw
b_new = b-0.01*lb
w_new, b_new

In [None]:
y_hat_new=np.correlate(x,w_new,mode="valid")+b_new
y_hat_new

In [None]:
.5*(y-y_hat_new)@(y-y_hat_new)