In [1]:
import torch
from torchvision import datasets
from torchvision.models.optical_flow.raft import ResidualBlock
from torchvision.transforms import ToTensor
from torch import nn
from torch import Tensor
import time
import copy
from torch.utils.data import DataLoader
import pandas as pd

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Network definition

In [3]:
class NetA1(nn.Module):
    def __init__(self, num_classes: int):
        super(NetA1, self).__init__()
        self.relu = nn.ReLU()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5, stride=2)
        self.flatten = nn.Flatten(start_dim=-3)
        self.linear1 = nn.Linear(576, num_classes)
        self.softmax = nn.Softmax(dim=0)
            
    def freeze(self, layer: str):
        for param in getattr(self, layer).parameters():
                param.requires_grad = False

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.flatten(x)
        x = self.relu(self.linear1(x))
        x = self.softmax(x)
        return x

In [4]:
class NetA2(nn.Module):
    def __init__(self, num_classes: int):
        super(NetA2, self).__init__()
        self.relu = nn.ReLU()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=5, stride=2)
        self.conv2 = nn.Conv2d(in_channels=4, out_channels=12, kernel_size=3, stride=2)
        self.flatten = nn.Flatten(start_dim=-3)
        self.linear1 = nn.Linear(300, num_classes)
        self.softmax = nn.Softmax(dim=0)
        
    def freeze(self, layer: str):
        for param in getattr(self, layer).parameters():
                param.requires_grad = False

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.flatten(x)
        x = self.relu(self.linear1(x))
        return x

### Network initialization

In [5]:
initialization_weights = torch.tensor([
    [[
        [1, 0, 0, 0, 1],
        [0, 1, 0, 1, 0], 
        [0, 0, 1, 0, 0], 
        [0, 1, 0, 1, 0],
        [1, 0, 0, 0, 1]
    ]],
    [[
        [0, 0, 1, 0, 0],
        [1, 1, 0, 1, 1], 
        [0, 0, 1, 0, 0], 
        [1, 1, 0, 1, 1],
        [0, 0, 1, 0, 0]
    ]],
    [[
        [0, 1, 1, 1, 0],
        [1, 1, 0, 1, 1], 
        [1, 0, 0, 0, 1], 
        [1, 1, 0, 1, 1],
        [0, 1, 1, 1, 0]
    ]],
    [[
        [1, 1, 0, 1, 1], 
        [0, 1, 0, 1, 0], 
        [0, 0, 1, 0, 0], 
        [1, 1, 0, 1, 1],
        [1, 1, 0, 1, 1]
    ]]], dtype=torch.float32)

initialization_biases = torch.tensor([0,0,0,0], dtype=torch.float32)
initialization_weights.shape

torch.Size([4, 1, 5, 5])

In [6]:
net_a1_hf = NetA1(10)
net_a1_ht = NetA1(10)
net_a1_dt = NetA1(10)

#set conv1 initialization of net_a1_hf
net_a1_hf.conv1.weight = nn.Parameter(copy.deepcopy(initialization_weights))
net_a1_hf.conv1.bias = nn.Parameter(copy.deepcopy(initialization_biases))

# set same weights and bias to each layer of each network
net_a1_ht.load_state_dict(net_a1_hf.state_dict())
for name, param in net_a1_hf.state_dict().items():
    if "conv1" not in name:
        net_a1_dt.state_dict()[name].copy_(param)

#set conv1 initialization
#net_a1_dt.conv1.load_state_dict(net_a2_dt.conv1.state_dict())

#freeze conv1 layer of net_a2_hf
net_a1_hf.freeze("conv1")

#save weights and bias of nat_a1_h* and net_a1_dt
torch.save({'initialization': net_a1_hf.state_dict()}, 'NetA1HF_init.pt')
torch.save({'initialization': net_a1_ht.state_dict()}, 'NetA1HT_init.pt')
torch.save({'initialization': net_a1_dt.state_dict()}, 'NetA1DT_init.pt')


# print weights and bias
print("Net_A1_HF: \n \t", net_a1_hf.state_dict())
print("Net_A1_HT: \n \t", net_a1_ht.state_dict())
print("Net_A1_DT: \n \t", net_a1_dt.state_dict())

Net_A1_HF: 
 	 OrderedDict([('conv1.weight', tensor([[[[1., 0., 0., 0., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [0., 1., 0., 1., 0.],
          [1., 0., 0., 0., 1.]]],


        [[[0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.]]],


        [[[0., 1., 1., 1., 0.],
          [1., 1., 0., 1., 1.],
          [1., 0., 0., 0., 1.],
          [1., 1., 0., 1., 1.],
          [0., 1., 1., 1., 0.]]],


        [[[1., 1., 0., 1., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [1., 1., 0., 1., 1.]]]])), ('conv1.bias', tensor([0., 0., 0., 0.])), ('linear1.weight', tensor([[-0.0109,  0.0062,  0.0268,  ..., -0.0219, -0.0045, -0.0344],
        [-0.0116,  0.0373,  0.0330,  ...,  0.0226,  0.0077,  0.0011],
        [-0.0132, -0.0024, -0.0067,  ...,  0.0403, -0.0240,  0.0265],
        ...,
        [-0.02

In [7]:
net_a2_hf = NetA2(10)
net_a2_ht = NetA2(10)
net_a2_dt = NetA2(10)

#set conv1 initialization of net_a2_hf
net_a2_hf.conv1.weight = nn.Parameter(copy.deepcopy(initialization_weights))
net_a2_hf.conv1.bias = nn.Parameter(copy.deepcopy(initialization_biases))

# set same weights and bias to each layer of each network
net_a2_ht.load_state_dict(net_a2_hf.state_dict())
for name, param in net_a2_hf.state_dict().items():
    if "conv1" not in name:
        net_a2_dt.state_dict()[name].copy_(param)

#set conv1 initialization
net_a2_dt.conv1.load_state_dict(net_a1_dt.conv1.state_dict())

#freeze conv1 layer of net_a2_hf
net_a2_hf.freeze("conv1")

#save weights and bias of nat_a1_h* and net_a1_dt
torch.save({'initialization': net_a2_hf.state_dict()}, 'NetA2HF_init.pt')
torch.save({'initialization': net_a2_ht.state_dict()}, 'NetA2HT_init.pt')
torch.save({'initialization': net_a2_dt.state_dict()}, 'NetA2DT_init.pt')


# print weights and bias
print("Net_A1_HF: \n \t", net_a2_hf.state_dict())
print("Net_A1_HT: \n \t", net_a2_ht.state_dict())
print("Net_A1_DT: \n \t", net_a2_dt.state_dict())

Net_A1_HF: 
 	 OrderedDict([('conv1.weight', tensor([[[[1., 0., 0., 0., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [0., 1., 0., 1., 0.],
          [1., 0., 0., 0., 1.]]],


        [[[0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.]]],


        [[[0., 1., 1., 1., 0.],
          [1., 1., 0., 1., 1.],
          [1., 0., 0., 0., 1.],
          [1., 1., 0., 1., 1.],
          [0., 1., 1., 1., 0.]]],


        [[[1., 1., 0., 1., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [1., 1., 0., 1., 1.]]]])), ('conv1.bias', tensor([0., 0., 0., 0.])), ('conv2.weight', tensor([[[[ 0.1518, -0.0377,  0.1249],
          [-0.0100, -0.1487,  0.0391],
          [-0.1228,  0.1132, -0.0737]],

         [[-0.0040, -0.0954, -0.1017],
          [ 0.1362,  0.0255, -0.1470],
          [-0.0381, -0.0117,  0.0466]],

 

### Preliminary Analysys

In [8]:
print( "Net_A1: \n",
       "\t|W_{conv_a1_hf} - W_{conv_a1_ht}| =", torch.norm(net_a1_hf.conv1.weight - net_a1_ht.conv1.weight),"\n",
      "\t|W_{linear_a1_hf} - W_{linear_a1_ht}| =", torch.norm(net_a1_hf.linear1.weight - net_a1_ht.linear1.weight), "\n",
      "\t|W_{linear_a1_hf} - W_{linear_a1_dt}| =", torch.norm(net_a1_hf.linear1.weight - net_a1_dt.linear1.weight), "\n")

print( "Net_A2: \n",
       "\t|W_{conv1_a2_hf} - W_{conv1_a2_ht}| =", torch.norm(net_a2_hf.conv1.weight - net_a2_ht.conv1.weight),"\n",
       "\t|W_{conv2_a2_hf} - W_{conv2_a2_ht}| =", torch.norm(net_a2_hf.conv2.weight - net_a2_ht.conv2.weight),"\n",
       "\t|W_{linear1_a2_hf} - W_{linear1_a2_ht}| =", torch.norm(net_a2_hf.linear1.weight - net_a2_ht.linear1.weight), "\n",
       "\t|W_{linear1_a2_hf} - W_{linear1_a2_dt}| =", torch.norm(net_a2_hf.linear1.weight - net_a2_dt.linear1.weight), "\n"""",
       "\t|W_{linear2_a2_hf} - W_{linear2_a2_ht}| =", torch.norm(net_a2_hf.linear2.weight - net_a2_ht.linear2.weight), "\n",
       "\t|W_{linear2_a2_hf} - W_{linear2_a2_dt}| =", torch.norm(net_a2_hf.linear2.weight - net_a2_dt.linear2.weight), "\n",
       "\t|W_{linear3_a2_hf} - W_{linear3_a2_ht}| =", torch.norm(net_a2_hf.linear3.weight - net_a2_ht.linear3.weight), "\n",
       "\t|W_{linear3_a2_hf} - W_{linear3_a2_dt}| =", torch.norm(net_a2_hf.linear3.weight - net_a2_dt.linear3.weight), "\n"
       """)

print( "Net_A1 Vs Net_A2: \n",
       "\t|W_{conv1_a1_hf} - W_{conv1_a2_hf}| =", torch.norm(net_a1_hf.conv1.weight - net_a2_hf.conv1.weight),"\n",
       "\t|W_{conv1_a1_ht} - W_{conv2_a2_ht}| =", torch.norm(net_a1_ht.conv1.weight - net_a2_ht.conv1.weight),"\n",
       "\t|W_{conv1_a1_dt} - W_{conv2_a2_dt}| =", torch.norm(net_a1_dt.conv1.weight - net_a2_dt.conv1.weight),"\n")

Net_A1: 
 	|W_{conv_a1_hf} - W_{conv_a1_ht}| = tensor(0., grad_fn=<LinalgVectorNormBackward0>) 
 	|W_{linear_a1_hf} - W_{linear_a1_ht}| = tensor(0., grad_fn=<LinalgVectorNormBackward0>) 
 	|W_{linear_a1_hf} - W_{linear_a1_dt}| = tensor(0., grad_fn=<LinalgVectorNormBackward0>) 

Net_A2: 
 	|W_{conv1_a2_hf} - W_{conv1_a2_ht}| = tensor(0., grad_fn=<LinalgVectorNormBackward0>) 
 	|W_{conv2_a2_hf} - W_{conv2_a2_ht}| = tensor(0., grad_fn=<LinalgVectorNormBackward0>) 
 	|W_{linear1_a2_hf} - W_{linear1_a2_ht}| = tensor(0., grad_fn=<LinalgVectorNormBackward0>) 
 	|W_{linear1_a2_hf} - W_{linear1_a2_dt}| = tensor(0., grad_fn=<LinalgVectorNormBackward0>) 
,
       "	|W_{linear2_a2_hf} - W_{linear2_a2_ht}| =", torch.norm(net_a2_hf.linear2.weight - net_a2_ht.linear2.weight), "
",
       "	|W_{linear2_a2_hf} - W_{linear2_a2_dt}| =", torch.norm(net_a2_hf.linear2.weight - net_a2_dt.linear2.weight), "
",
       "	|W_{linear3_a2_hf} - W_{linear3_a2_ht}| =", torch.norm(net_a2_hf.linear3.weight - net_a2_ht

In [9]:
print("Net_A1HF:")
for param in net_a1_hf.conv1.parameters():
    print("\t",param.requires_grad)
print("Net_A2HF:")
for param in net_a2_hf.conv1.parameters():
    print("\t",param.requires_grad)
print("Net_A1HT:")
for param in net_a1_ht.conv1.parameters():
    print("\t",param.requires_grad)
print("Net_A2HT:")
for param in net_a2_ht.conv1.parameters():
    print("\t",param.requires_grad)
print("Net_A1DT:")
for param in net_a1_dt.conv1.parameters():
    print("\t",param.requires_grad)
print("Net_A2DT:")
for param in net_a2_dt.conv1.parameters():
    print("\t",param.requires_grad)

Net_A1HF:
	 False
	 False
Net_A2HF:
	 False
	 False
Net_A1HT:
	 True
	 True
Net_A2HT:
	 True
	 True
Net_A1DT:
	 True
	 True
Net_A2DT:
	 True
	 True


In [10]:
print(net_a1_ht.conv1.weight is net_a1_hf.conv1.weight)
print(net_a1_ht.linear1.weight is net_a1_hf.linear1.weight)
print(net_a1_hf.linear1.weight is net_a1_dt.linear1.weight)
print(net_a2_hf.conv1.weight is net_a1_hf.conv1.weight)
print(net_a2_ht.conv1.weight is net_a1_ht.conv1.weight)
print(net_a2_dt.conv1.weight is net_a1_dt.conv1.weight)
print(net_a2_hf.linear1.weight is net_a2_ht.linear1.weight)
print(net_a2_hf.linear1.weight is net_a2_dt.linear1.weight)

False
False
False
False
False
False
False
False


### Data Loading

In [11]:
train_data= datasets.FashionMNIST(root='data', train=True, download=True, transform=ToTensor(),)

test_data = datasets.FashionMNIST(root='data', train=False, download=True, transform=ToTensor(),)

In [12]:
labels_map={
    0: 'T-shirt',
    1: 'Trouser',
    2: 'Pullover',
    3: 'Dress',
    4: 'Coat',
    5: 'Sandal',
    6: 'Shirt',
    7: 'Sneaker',
    8: 'Bag',
    9: 'Ankle Boot',
}
sample_idx = torch.randint(len(train_data), size = (1,)).item()
image, label = train_data[sample_idx]
image.shape

torch.Size([1, 28, 28])

In [13]:
batch_size = 128

train_dataloader= DataLoader(train_data, batch_size = batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

### Training/Test Loop

In [14]:
def train_loop(device, dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 1000 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

    return model.state_dict()


def test_loop(device, dataloader, model, loss_fn):
      size = len(dataloader.dataset)
      num_batches = len(dataloader)
      test_loss, correct = 0, 0

      with torch.no_grad():
        for X, y in dataloader:
          X, y = X.to(device), y.to(device)
          pred = model(X)
          test_loss += loss_fn(pred, y).item()
          correct += (pred.argmax(1) == y).type(torch.float).sum().item()

      test_loss /= num_batches
      correct /= size
      print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
      return 100*correct, test_loss

### Training

In [15]:
learning_rate = 5e-4#4.1
epochs = 60

In [16]:
def train_test(device, train_dataloader, test_dataloader, net, learning_rate, epochs):
    net.to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    
    accuracies = []
    losses = []
    times=[]
    
    time_s = time.time()
    for t in range(epochs):
        print(f"Epoch {t+1}\n-------------------------------")
        train_loop(device, train_dataloader, net, loss_fn, optimizer)
        acc, loss = test_loop(device, test_dataloader, net, loss_fn)
        accuracies.append(acc)
        losses.append(loss)
        times.append(time.time() - time_s)
    print("Done!")
    return pd.DataFrame(
        {
            "epoch": [ i for i in range(epochs)],
            "times": times,
            "loss": losses,
            "accuracy": accuracies
        }
    )

NetA1 -> HF Train 

In [17]:
print(net_a1_hf.state_dict())
df_net_a1_hf = train_test(device, train_dataloader, test_dataloader, net_a1_hf, learning_rate, epochs)
df_net_a1_hf.to_csv('NetA1HF_results.csv', index=False)
torch.save({'initialization': net_a1_hf.state_dict()}, 'NetA1HF_trained.pt')
net_a1_hf.state_dict()

OrderedDict([('conv1.weight', tensor([[[[1., 0., 0., 0., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [0., 1., 0., 1., 0.],
          [1., 0., 0., 0., 1.]]],


        [[[0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.]]],


        [[[0., 1., 1., 1., 0.],
          [1., 1., 0., 1., 1.],
          [1., 0., 0., 0., 1.],
          [1., 1., 0., 1., 1.],
          [0., 1., 1., 1., 0.]]],


        [[[1., 1., 0., 1., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [1., 1., 0., 1., 1.]]]])), ('conv1.bias', tensor([0., 0., 0., 0.])), ('linear1.weight', tensor([[-0.0109,  0.0062,  0.0268,  ..., -0.0219, -0.0045, -0.0344],
        [-0.0116,  0.0373,  0.0330,  ...,  0.0226,  0.0077,  0.0011],
        [-0.0132, -0.0024, -0.0067,  ...,  0.0403, -0.0240,  0.0265],
        ...,
        [-0.0260,  0.0160,  0

OrderedDict([('conv1.weight',
              tensor([[[[1., 0., 0., 0., 1.],
                        [0., 1., 0., 1., 0.],
                        [0., 0., 1., 0., 0.],
                        [0., 1., 0., 1., 0.],
                        [1., 0., 0., 0., 1.]]],
              
              
                      [[[0., 0., 1., 0., 0.],
                        [1., 1., 0., 1., 1.],
                        [0., 0., 1., 0., 0.],
                        [1., 1., 0., 1., 1.],
                        [0., 0., 1., 0., 0.]]],
              
              
                      [[[0., 1., 1., 1., 0.],
                        [1., 1., 0., 1., 1.],
                        [1., 0., 0., 0., 1.],
                        [1., 1., 0., 1., 1.],
                        [0., 1., 1., 1., 0.]]],
              
              
                      [[[1., 1., 0., 1., 1.],
                        [0., 1., 0., 1., 0.],
                        [0., 0., 1., 0., 0.],
                        [1., 1., 0., 1., 1.],


NetA1-> HT train

In [18]:
print(net_a1_ht.state_dict())
df_net_a1_ht = train_test(device, train_dataloader, test_dataloader, net_a1_ht, learning_rate, epochs)
df_net_a1_ht.to_csv('NetA1HT_results.csv', index=False)
torch.save({'initialization': net_a1_ht.state_dict()}, 'NetA1HT_trained.pt')
net_a1_ht.state_dict()

OrderedDict([('conv1.weight', tensor([[[[1., 0., 0., 0., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [0., 1., 0., 1., 0.],
          [1., 0., 0., 0., 1.]]],


        [[[0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.]]],


        [[[0., 1., 1., 1., 0.],
          [1., 1., 0., 1., 1.],
          [1., 0., 0., 0., 1.],
          [1., 1., 0., 1., 1.],
          [0., 1., 1., 1., 0.]]],


        [[[1., 1., 0., 1., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [1., 1., 0., 1., 1.]]]])), ('conv1.bias', tensor([0., 0., 0., 0.])), ('linear1.weight', tensor([[-0.0109,  0.0062,  0.0268,  ..., -0.0219, -0.0045, -0.0344],
        [-0.0116,  0.0373,  0.0330,  ...,  0.0226,  0.0077,  0.0011],
        [-0.0132, -0.0024, -0.0067,  ...,  0.0403, -0.0240,  0.0265],
        ...,
        [-0.0260,  0.0160,  0

OrderedDict([('conv1.weight',
              tensor([[[[ 0.7176, -0.5781, -0.6060, -0.0475,  1.1054],
                        [-0.1733,  0.9834, -0.1711,  1.4371, -0.1028],
                        [-0.5196,  0.2502,  0.8477,  0.5163, -0.2029],
                        [-0.2545,  0.9754, -0.3562,  1.4367, -0.1005],
                        [ 1.2646,  0.1536, -0.0976,  0.3039,  1.5294]]],
              
              
                      [[[-0.4497, -0.4623,  1.6874, -0.0393, -0.2903],
                        [ 0.7817,  0.6986,  0.9122,  1.5241,  1.0825],
                        [-0.1048, -0.0579,  1.9054,  0.6129, -0.1130],
                        [ 0.8026,  0.7876,  0.7397,  1.5373,  1.0518],
                        [-0.2522, -0.1370,  1.6614,  0.3832,  0.0592]]],
              
              
                      [[[-0.2018,  1.3166,  1.0381,  1.3409, -0.1870],
                        [ 1.2004,  1.0302, -0.4551,  1.0690,  1.0940],
                        [ 1.1427, -0.0579, -0.6418, -0

NetA1-> DT train

In [19]:
print(net_a1_dt.state_dict())
df_net_a1_dt = train_test(device, train_dataloader, test_dataloader, net_a1_dt, learning_rate, epochs)
df_net_a1_dt.to_csv('NetA1DT_results.csv', index=False)
torch.save({'initialization': net_a1_dt.state_dict()}, 'NetA1DT_trained.pt')
net_a1_dt.state_dict()

OrderedDict([('conv1.weight', tensor([[[[-0.0061, -0.1554, -0.0142,  0.0578, -0.0099],
          [ 0.1869,  0.1770, -0.1435,  0.1801,  0.1628],
          [ 0.1702, -0.0947,  0.1717, -0.0842,  0.1247],
          [ 0.0238,  0.0544,  0.0687,  0.1856,  0.0196],
          [ 0.0523,  0.0625,  0.0194,  0.0104,  0.1586]]],


        [[[ 0.1630,  0.0413, -0.0511, -0.1957, -0.0891],
          [-0.1879,  0.0403, -0.1168, -0.0925,  0.1643],
          [ 0.0493,  0.1674,  0.1800,  0.1824,  0.0387],
          [-0.0117, -0.1641,  0.0288, -0.1827, -0.0479],
          [ 0.0218, -0.0633, -0.0475, -0.1878,  0.0313]]],


        [[[ 0.1177, -0.1837,  0.1865, -0.0738, -0.0184],
          [ 0.0237,  0.1294,  0.1114,  0.1400,  0.0315],
          [ 0.0940, -0.0617, -0.0338, -0.0947, -0.1626],
          [ 0.0766,  0.0023,  0.0311,  0.1917,  0.1184],
          [ 0.1952,  0.1259, -0.0296, -0.1338,  0.0884]]],


        [[[ 0.0724,  0.0605, -0.1065, -0.0611, -0.0419],
          [-0.1390, -0.0909,  0.0681, -0.1637,

OrderedDict([('conv1.weight',
              tensor([[[[-0.3240, -0.4770, -0.1767,  0.4256,  0.3182],
                        [ 0.7024,  0.3412, -0.0836,  0.6190,  0.6251],
                        [ 0.4918, -0.0852, -0.0355,  0.2151,  0.5686],
                        [ 0.1675, -0.0704, -0.1758,  0.3755,  0.2207],
                        [ 0.2447,  0.0588, -0.0694,  0.4799,  0.6172]]],
              
              
                      [[[ 0.7177,  0.9911,  0.0063, -0.7684,  0.1485],
                        [ 0.2821,  0.7993,  0.0987, -0.1572,  0.9992],
                        [ 0.2418,  0.9996,  0.5280,  0.3445,  0.9843],
                        [-0.0828,  0.1330, -0.0860, -0.4443,  0.3603],
                        [ 0.0342,  0.0854, -0.2356, -0.5914,  0.1753]]],
              
              
                      [[[ 0.2906, -0.1114,  0.5505,  0.2307, -0.0313],
                        [ 0.5189,  0.2588,  0.5592,  0.5711,  0.0024],
                        [ 0.3384, -0.0882, -0.2907, -0

NetA2-> HF Train

In [20]:
print(net_a2_hf.state_dict())
df_net_a2_hf = train_test(device, train_dataloader, test_dataloader, net_a2_hf, learning_rate, epochs)
df_net_a2_hf.to_csv('NetA2HF_results.csv', index=False)
torch.save({'initialization': net_a2_hf.state_dict()}, 'NetA2HF_trained.pt')
net_a2_hf.state_dict() 

OrderedDict([('conv1.weight', tensor([[[[1., 0., 0., 0., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [0., 1., 0., 1., 0.],
          [1., 0., 0., 0., 1.]]],


        [[[0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [0., 0., 1., 0., 0.]]],


        [[[0., 1., 1., 1., 0.],
          [1., 1., 0., 1., 1.],
          [1., 0., 0., 0., 1.],
          [1., 1., 0., 1., 1.],
          [0., 1., 1., 1., 0.]]],


        [[[1., 1., 0., 1., 1.],
          [0., 1., 0., 1., 0.],
          [0., 0., 1., 0., 0.],
          [1., 1., 0., 1., 1.],
          [1., 1., 0., 1., 1.]]]])), ('conv1.bias', tensor([0., 0., 0., 0.])), ('conv2.weight', tensor([[[[ 0.1518, -0.0377,  0.1249],
          [-0.0100, -0.1487,  0.0391],
          [-0.1228,  0.1132, -0.0737]],

         [[-0.0040, -0.0954, -0.1017],
          [ 0.1362,  0.0255, -0.1470],
          [-0.0381, -0.0117,  0.0466]],

         [[ 0.03

KeyboardInterrupt: 

NetA2-> HT Train

In [None]:
print(net_a2_ht.state_dict())
df_net_a2_ht = train_test(device, train_dataloader, test_dataloader, net_a2_ht, learning_rate, epochs)
df_net_a2_ht.to_csv('NetA2HT_results.csv', index=False)
torch.save({'initialization': net_a2_ht.state_dict()}, 'NetA2HT_trained.pt')
net_a2_ht.state_dict()

NetA2-> DT Train

In [None]:
print(net_a2_dt.state_dict())
df_net_a2_dt = train_test(device, train_dataloader, test_dataloader, net_a2_dt, learning_rate, epochs)
df_net_a2_dt.to_csv('NetA2DT_results.csv', index=False)
torch.save({'initialization': net_a2_dt.state_dict()}, 'NetA2HT_trained.pt')
net_a2_dt.state_dict()