In [1]:
!pip install torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim




[notice] A new release of pip is available: 24.1.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
number_of_epochs = 2
learning_rate = 0.06

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]  # Normalize the images to [-1, 1]
)

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=2500,
                                          shuffle=True, num_workers=2) # 50000

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=2500,
                                         shuffle=False, num_workers=2) # 10000


def one_hot_encode(labels, num_classes=10):
    """ Converts a batch of labels to one-hot encoded format. """
    batch_size = labels.size(0)
    one_hot_labels = torch.zeros(batch_size, num_classes, device=labels.device)
    one_hot_labels.scatter_(1, labels.unsqueeze(1), 1)
    return one_hot_labels


def create_positive_data(data, labels):
    """ Return original data and one-hot encoded labels. """
    one_hot_labels = one_hot_encode(labels, num_classes=10)
    return data.cuda(), one_hot_labels.cuda()

def create_negative_data(data, labels):
    """ Create negative samples by randomly selecting different labels and return with one-hot encoded format. """
    batch_size = labels.size(0)
    num_classes = 10
    # Generate random labels different from the current labels
    incorrect_labels = (labels + torch.randint(1, num_classes, (batch_size,), device=labels.device)) % num_classes
    one_hot_labels = one_hot_encode(incorrect_labels, num_classes=10)
    return data.cuda(), one_hot_labels.cuda()

Files already downloaded and verified
Files already downloaded and verified


In [4]:
class CustomNetwork(nn.Module):
    def __init__(self):
        super(CustomNetwork, self).__init__()
        # First hidden layer: Receives input and applies an 11x11 convolution
        self.layer1 = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=11, padding=5, stride=1)
        self.bn1 = nn.BatchNorm2d(num_features=3)  # Batch normalization for 3 output channels
        
        # Second hidden layer: Takes the output of the first layer and applies another 11x11 convolution
        self.layer2 = nn.Conv2d(in_channels=3, out_channels=3, kernel_size=11, padding=5, stride=1)
        self.bn2 = nn.BatchNorm2d(num_features=3)  # Batch normalization for 3 output channels
        self.optimizer = optim.Adam(self.parameters(), lr=learning_rate)
        self.hebbian_factors = torch.ones(2, 10, 3072, requires_grad=True, device='cuda')
        self.feedback_weights = nn.Parameter(torch.randn(3, 3, 11, 11))
        self.hebbian_optimizer = optim.Adam([self.hebbian_factors, self.feedback_weights], lr=learning_rate)
        self.threshold = 0
        self.feedback = None
        
    def soft_plus_loss(self, positive_goodness, negative_goodness, is_second_phase=False):
        if is_second_phase:
            threshold = self.threshold * 2
        else:
            threshold = self.threshold
        return torch.log(1 + torch.exp(torch.cat([
            -positive_goodness + threshold,
            negative_goodness - threshold]))).mean()

    def forward(self, x, layer_num, feedback=None):
        if layer_num == 0:
            x = self.layer1(x)
            x = self.bn1(x)
            x = F.relu(x)
            
            if feedback is None:
                # Generate feedback from the second layer if not provided
                x2 = F.relu(self.layer2(x))
                feedback = x2  # Optionally update self.feedback here if meant to persist
            
            # Apply feedback, which can be either the external or freshly computed
            feedback_effect = F.conv2d(feedback, self.feedback_weights, padding=5)
            x += feedback_effect
        
        elif layer_num == 1:
            x = self.layer2(x)
            x = self.bn2(x)
            x = F.relu(x)
            self.feedback = x.clone()  # Update feedback only after processing with layer2

        return x
    
    def train_network(self, training_data_loader):
        for _ in tqdm(range(number_of_epochs)):
            for images, labels in training_data_loader:
               positive_data, positive_labels = create_positive_data(images, labels)
               negative_data, negative_labels = create_negative_data(images, labels)
               
               for i in range(2):
                   hebbian_factors = self.hebbian_factors[i, :, :]
                   positive_data = self.forward(positive_data, i, self.feedback)
                   flattened_positive_data = positive_data.clone()
                   flattened_positive_data = flattened_positive_data.view(flattened_positive_data.size(0), -1).detach()
                   negative_data = self.forward(negative_data, i, self.feedback)
                   flattened_negative_data = negative_data.clone()
                   flattened_negative_data = flattened_negative_data.view(flattened_negative_data.size(0), -1).detach()
                   
                   
                   negative_goodness = (torch.mm(negative_labels, hebbian_factors) * flattened_positive_data).mean(1)
                   positive_goodness = (torch.mm(positive_labels, hebbian_factors) * flattened_negative_data).mean(1)
                   loss = self.soft_plus_loss(positive_goodness, negative_goodness)
                   self.optimizer.zero_grad()
                   self.hebbian_optimizer.zero_grad()
                   loss.backward()
                   self.hebbian_optimizer.step()
                   self.optimizer.step()
                   
    

    def predict(self, testing_data_loader):
        self.cuda()  # Ensure the model is on GPU
        correct = 0
        total = 0
        for images, actual_labels in testing_data_loader:
            
            images = images.cuda()  # Ensure images are on GPU
            actual_labels = actual_labels.cuda()  # Ensure labels are on GPU
            batch_size = images.size(0)
            goodness_per_label = []
    
            for label in range(10):  # Assuming 10 classes
                self.feedback = None
                labels = torch.full((batch_size,), label, dtype=torch.long, device=images.device)
                marked_data, one_hot_labels = create_positive_data(images, labels)
                goodness = []
                
                for iteration in range(6):
                    for layer_num in range(2):  # Assuming 2 layers
                        marked_data = self.forward(marked_data, layer_num, self.feedback)
                        if iteration >= 3:
                            flattened_data = marked_data.view(marked_data.size(0), -1).detach()
                            goodness_value = (torch.mm(one_hot_labels, self.hebbian_factors[layer_num, :, :]) * flattened_data).mean(1)
                            goodness.append(goodness_value)
    
                goodness_per_label.append(torch.sum(torch.stack(goodness), dim=0).unsqueeze(1))
    
            goodness_per_label = torch.cat(goodness_per_label, 1)
            predicted_labels = goodness_per_label.argmax(dim=1)
            correct += (predicted_labels == actual_labels).sum().item()
            total += batch_size
    
        accuracy = correct / total
        return accuracy


In [5]:
if __name__ == "__main__":
    torch.cuda.empty_cache()
    torch.manual_seed(1234)
    network = CustomNetwork().cuda()
    for i in range(20):
        network.train_network(trainloader)
        training_acc = network.predict(trainloader)
        testing_acc = network.predict(testloader)
        print(f"Training Acc: {training_acc} in {(i+1) * number_of_epochs} epochs")
        print(f"Testing Acc: {testing_acc} in {(i+1) * number_of_epochs} epochs")
    
    

  0%|          | 0/2 [00:00<?, ?it/s]

Shape of x2: torch.Size([2500, 3, 32, 32])
Shape of x: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of x2: torch.Size([2500, 3, 32, 32])
Shape of x: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3,

 50%|█████     | 1/2 [00:06<00:06,  6.24s/it]

Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 3

100%|██████████| 2/2 [00:12<00:00,  6.24s/it]


Shape of x2: torch.Size([2500, 3, 32, 32])
Shape of x: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of x2: torch.Size([2500, 3, 32, 32])
Shape of x: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of x2: torch.Size([2500, 3, 32, 32])
Shape of x: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback

  0%|          | 0/2 [00:00<?, ?it/s]

Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 3

 50%|█████     | 1/2 [00:10<00:10, 10.79s/it]

Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 3

100%|██████████| 2/2 [00:22<00:00, 11.06s/it]


Shape of x2: torch.Size([2500, 3, 32, 32])
Shape of x: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of x2: torch.Size([2500, 3, 32, 32])
Shape of x: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of x2: torch.Size([2500, 3, 32, 32])
Shape of x: torch.Size([2500, 3, 32, 32])
Shape of feedback_effect: torch.Size([2500, 3, 32, 32])
Shape of feedback

OutOfMemoryError: CUDA out of memory. Tried to allocate 30.00 MiB. GPU 0 has a total capacity of 11.99 GiB of which 0 bytes is free. Of the allocated memory 42.21 GiB is allocated by PyTorch, and 5.33 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)