In [8]:
import time
!pip install tqdm
!pip install matplotlib

import random
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10
from torchvision.transforms import Compose, ToTensor, Normalize, Lambda

import torch.nn as nn
import os
from torch import Tensor
from tqdm import tqdm
import matplotlib.pyplot as plt
from torch.optim import Adam
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
number_of_epochs = 20

hidden_layer_neurons = 1000

class Layer(nn.Linear):
    # hebbian_weights_layer_one_zeroth = nn.Parameter(torch.ones(10, hidden_layer_neurons).cuda())
    # hebbian_weights_layer_two_zeroth = nn.Parameter(torch.ones(10, hidden_layer_neurons).cuda())
    hebbian_weights_layer_one = nn.Parameter(torch.ones(10, hidden_layer_neurons).cuda())
    # hebbian_weights_layer_one_quadra = nn.Parameter(torch.ones(10, hidden_layer_neurons).cuda())
    hebbian_weights_layer_two = nn.Parameter(torch.ones(10, hidden_layer_neurons).cuda())
    # hebbian_weights_layer_two_quadra = nn.Parameter(torch.ones(10, hidden_layer_neurons).cuda())
    # hebbian_weights_layer_one_third = nn.Parameter(torch.ones(10, hidden_layer_neurons).cuda())
    # hebbian_weights_layer_two_third = nn.Parameter(torch.ones(10, hidden_layer_neurons).cuda())
    # 
    def __init__(self, in_features, out_features, bias=True, device=None, d_type=None, is_hinge_loss=False):
        super().__init__(in_features, out_features, bias, device, d_type)
        self.activation = torch.nn.ReLU()
        self.learning_rate = 0.02
        self.optimizer = Adam(self.parameters(), lr=self.learning_rate)
        self.threshold = 2.0
        self.num_of_epochs = number_of_epochs
        self.is_hinge_loss = is_hinge_loss
        self.hebbian_optimizer = Adam([Layer.hebbian_weights_layer_two, Layer.hebbian_weights_layer_one], lr=0.02)
        
        
    def compute_hebbian_activity(self, labels, values, layer_num):
        
        
        if layer_num == 0:
            hebbian_value = torch.mm(values.pow(2) * self.layer_weights[layer_num], self.hebbian_weights_layer_one.T) * labels
        else:
            hebbian_value = torch.mm(values, self.hebbian_weights_layer_two.T) * labels
        return hebbian_value.mean(1).cuda()
    
    
    def forward(self, input: Tensor) -> Tensor:
        normalized_input = input / (input.norm(2, 1, keepdim=True) + 1e-4)
        output = torch.mm(normalized_input, self.weight.T) + self.bias.unsqueeze(0)
        return self.activation(output)

    def balanced_loss(self, positive_goodness, negative_goodness, difference_sum, alpha=8.0):
        delta = positive_goodness.cuda() - negative_goodness.cuda() + difference_sum.cuda()
        per_instance_loss = torch.log(1 + torch.exp(-alpha * delta))
        return per_instance_loss.mean()  


    def exponential_hinge_loss(self, positive_goodness, negative_goodness, delta=1.0, is_second_phase=False):
        if is_second_phase:
            threshold = self.threshold * 2
        else:
            threshold = self.threshold
        positive_loss = torch.exp(torch.clamp(delta - (positive_goodness - threshold), min=0)) - 1
        negative_loss = torch.exp(torch.clamp(delta - (threshold - negative_goodness), min=0)) - 1
        return torch.cat([positive_loss, negative_loss]).mean()



    def plot_goodness(self, positive_goodness, negative_goodness, difference):
        plt.figure(figsize=(10, 5))
        plt.plot(positive_goodness, label='Positive Goodness', color='b')
        plt.plot(negative_goodness, label='Negative Goodness', color='r')
        plt.plot(difference, label='Difference Goodness', color='g')
        plt.xlabel('Epoch')
        plt.ylabel('Goodness Value')
        plt.title('Change in Goodness During Training')
        plt.legend()
        plt.grid(True)
        plt.show()
        
        
    def get_difference_from_other_layers(self, layer_num):
        difference_sum = torch.zeros(Network.layer_differences.shape[0])
        for i in range(Network.layer_differences.shape[1]):
            if i != layer_num:
                difference_sum = difference_sum.cuda().add_(Network.layer_differences[:, i].cuda())
        return difference_sum
        
    def train_layer(self, positive_input, negative_input, layer_num):
        positive_goodness_history = []
        negative_goodness_history = []
        difference_history = []
        difference_sum = self.get_difference_from_other_layers(layer_num).cuda()
        
        
        for _ in range(number_of_epochs):
            positive_output = self.forward(positive_input)  # Shape: [batch_size, 500]
            negative_output = self.forward(negative_input)
            
            # First Layer
            if layer_num == 0:
                # positive_goodness = (torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_one_zeroth) * positive_output + torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_one) * positive_output.pow(2) + torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_one_quadra) * positive_output.pow(4) + torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_one_third) * positive_output.pow(3)).mean(1)
                positive_goodness = (torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_one) * positive_output.pow(2)).mean(1)
                # negative_goodness = (torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_one_zeroth) * negative_output + torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_one) * negative_output.pow(2) + torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_one_quadra) * negative_output.pow(4) + torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_one_third) * negative_output.pow(3)).mean(1)
                negative_goodness = (torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_one) * negative_output.pow(2)).mean(1)
            else:
                # Second Layer
                # positive_goodness = (torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_two_zeroth) * positive_output + torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_two) * positive_output.pow(2) + torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_two_quadra) * positive_output.pow(4) + torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_two_third) * positive_output.pow(3)).mean(1)
                positive_goodness = (torch.mm(Network.positive_labels, Layer.hebbian_weights_layer_two) * positive_output.pow(2)).mean(1)
                # negative_goodness = (torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_two_zeroth) * positive_output + torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_two) * negative_output.pow(2) + torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_two_quadra) * negative_output.pow(4) + torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_two_third) * negative_output.pow(3)).mean(1)
                negative_goodness = (torch.mm(Network.negative_labels, Layer.hebbian_weights_layer_two) * negative_output.pow(2)).mean(1)
            
            positive_goodness_history.append(positive_goodness.mean().item())
            negative_goodness_history.append(negative_goodness.mean().item())
            difference_history.append(positive_goodness.mean().item() - negative_goodness.mean().item())

            latest_difference = positive_goodness - negative_goodness
            
            # Network.layer_differences = Network.layer_differences.clone()
            Network.layer_differences[:, layer_num] = latest_difference.detach().cuda()


            if self.is_hinge_loss:
                loss = self.exponential_hinge_loss(positive_goodness, negative_goodness)
            else:
                loss = self.balanced_loss(positive_goodness, negative_goodness, difference_sum)

            self.optimizer.zero_grad()
            self.hebbian_optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            self.hebbian_optimizer.step()

        # self.plot_goodness(positive_goodness_history, negative_goodness_history, difference_history)
        return self.forward(positive_input).detach(), self.forward(negative_input).detach()


class Network(nn.Module):
    # hebbian_weights = nn.Parameter(torch.ones(10, 774).cuda())
    positive_labels = []
    negative_labels= []
    layer_differences = torch.zeros(60000, 2).cuda()
    def __init__(self, dimension_configs):
        super().__init__()
        self.layers = []
        for i in range(len(dimension_configs) - 1):
            self.layers += [Layer(dimension_configs[i], dimension_configs[i + 1]).cuda()]
            
            
    def balanced_loss(self, positive_goodness, negative_goodness, alpha=4.0):
        delta = positive_goodness - negative_goodness
        per_instance_loss = torch.log(1 + torch.exp(-alpha * delta))
        return per_instance_loss.mean()  


    def mark_data(self, data, label):
        marked_data = data.clone().cuda()
        return marked_data
    
    
    def predict(self, input_data):
        goodness_layer_one = []
        goodness_layer_two = []

        for layer_num, layer in enumerate(self.layers):
            input_data = layer(input_data)
            activity = input_data
            
            for label in range(10):
                hebbian_weight_one = Layer.hebbian_weights_layer_one[label, :]
                hebbian_weight_two = Layer.hebbian_weights_layer_two[label, :]
                if layer_num == 0:
                    goodness_value = (activity * hebbian_weight_one).mean(1)
                    goodness_layer_one.append(goodness_value.unsqueeze(0))
                else:
                    goodness_value = (activity * hebbian_weight_two).mean(1)
                    goodness_layer_two.append(goodness_value.unsqueeze(0))

        # Convert lists to tensors
        goodness_layer_one = torch.cat(goodness_layer_one, dim=0)
        goodness_layer_two = torch.cat(goodness_layer_two, dim=0)

        # Sum the goodness values across all labels
        goodness = goodness_layer_one.add_(goodness_layer_two)
        # Get the index of the maximum goodness value
        return goodness.argmax(dim=0)
        
    def slow_predict(self, input_data):
        goodness_per_label = []
        for label in range(10):
            marked_data = self.mark_data(input_data, label)
            # hebbian_weight_one_zeroth = Layer.hebbian_weights_layer_one_zeroth[label, :]
            # hebbian_weight_two_zeroth = Layer.hebbian_weights_layer_two_zeroth[label, :]
            hebbian_weight_one = Layer.hebbian_weights_layer_one[label, :]
            hebbian_weight_two = Layer.hebbian_weights_layer_two[label, :]
            # hebbian_weight_one_third = Layer.hebbian_weights_layer_one_third[label, :]
            # hebbian_weight_two_third = Layer.hebbian_weights_layer_two_third[label, :]
            # hebbian_weight_one_quadra = Layer.hebbian_weights_layer_one_quadra[label, :]
            # hebbian_weight_two_quadra = Layer.hebbian_weights_layer_two_quadra[label, :]
            goodness = []
            for layer_num, layer in enumerate(self.layers):
                marked_data = layer(marked_data)
                if layer_num == 0:
                    goodness_value = (marked_data.pow(2) * hebbian_weight_one).mean(1)
                else:
                    goodness_value = (marked_data.pow(2) * hebbian_weight_two).mean(1)
                goodness.append(goodness_value)

            goodness_per_label.append(torch.sum(torch.stack(goodness), dim=0).unsqueeze(1))
        goodness_per_label = torch.cat(goodness_per_label, 1)
        return goodness_per_label.argmax(dim=1)
    def compute_hebbian_activity(self, values):
        labels = values[:, :10].cuda()
        hebbian = values[:, 10:].cuda()
        hebbian_value = torch.mm(hebbian, self.hebbian_weights.T) * labels
        return hebbian_value.mean(1).cuda()
    
    
    def train_network(self, positive_goodness, negative_goodness, training_data, training_data_label, positive_one_hot_labels, negative_one_hot_labels):
        
        # positive_mean_values = []
        # negative_mean_values = []
        # 
        # for _ in tqdm(range(number_of_epochs)):
        #     positive_labels = positive_goodness[:, :10].cuda()
        #     positive_hebbian = positive_goodness[:, 10:].cuda()
        #     positive_hebbian_value = torch.mm(positive_hebbian, self.hebbian_weights.T.pow(2)) * positive_labels
        #     positive_mean_value = positive_hebbian_value.mean(1).cuda()
        #     positive_mean_values.append(positive_mean_value.mean().item())  # Store average for plotting
        # 
        #     negative_labels = negative_goodness[:, :10].cuda()
        #     negative_hebbian = negative_goodness[:, 10:].cuda()
        #     negative_hebbian_value = torch.mm(negative_hebbian, self.hebbian_weights.T.pow(2)) * negative_labels
        #     negative_mean_value = negative_hebbian_value.mean(1).cuda()
        #     negative_mean_values.append(negative_mean_value.mean().item())  # Store average for plotting
        # 
        #     loss = self.balanced_loss(positive_mean_value, negative_mean_value)
        #     self.hebbian_optimizer.zero_grad()
        #     loss.backward()
        #     self.hebbian_optimizer.step()
        # 
        # plt.figure(figsize=(10, 5))
        # plt.plot(positive_mean_values, label='Positive Mean Value', color='blue')
        # plt.plot(negative_mean_values, label='Negative Mean Value', color='red')
        # plt.title('Change in Mean Values Over Epochs')
        # plt.xlabel('Epochs')
        # plt.ylabel('Mean Value')
        # plt.legend()
        # plt.grid(True)
        # plt.show()
        # 
        for epoch in tqdm(range(1)):
            goodness_pos, goodness_neg = positive_goodness, negative_goodness
            negative_goodness, negative_one_hot_labels = create_negative_data(training_data, training_data_label)
            goodness_neg = negative_goodness
            positive_labels = goodness_pos[:, :10]
            negative_labels = negative_goodness[:, :10]

            Network.positive_labels = positive_one_hot_labels
            Network.negative_labels = negative_one_hot_labels
            for i, layer in enumerate(self.layers):
                print('Training Layer', i, '...')
                goodness_pos, goodness_neg = layer.train_layer(goodness_pos, goodness_neg, i)
            
        


def load_CIFAR10_data(train_batch_size=30000, test_batch_size=6000):
    data_transformation = Compose([
        ToTensor(),
        Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 
        Lambda(lambda x: torch.flatten(x))
    ])

    training_data_loader = DataLoader(
        CIFAR10('./data/', train=True, download=True, transform=data_transformation),
        batch_size=train_batch_size,
        shuffle=False
    )

    testing_data_loader = DataLoader(
        CIFAR10('./data/', train=False, download=True, transform=data_transformation),
        batch_size=test_batch_size,
        shuffle=False
    )

    return training_data_loader, testing_data_loader

def load_FashionMNIST_data(train_batch_size=60000, test_batch_size=10000):
    data_transformation = Compose([
        ToTensor(),
        Normalize((0.2860,), (0.3530,)),
        Lambda(lambda x: torch.flatten(x))
    ])

    training_data_loader = DataLoader(
        FashionMNIST('./data/', train=True, download=True, transform=data_transformation),
        batch_size=train_batch_size,
        shuffle=False
    )

    testing_data_loader = DataLoader(
        FashionMNIST('./data/', train=False, download=True, transform=data_transformation),
        batch_size=test_batch_size,
        shuffle=False
    )

    return training_data_loader, testing_data_loader

def load_MNIST_data(train_batch_size=50000, test_batch_size=10000):
    data_transformation = Compose([
        ToTensor(),
        Normalize((0.1307,), (0.3081,)),
        Lambda(lambda x: torch.flatten(x))
    ])

    training_data_loader = DataLoader(
        MNIST('./data/', train=True, download=True, transform=data_transformation),
        batch_size=train_batch_size,
        shuffle=False
    )

    testing_data_loader = DataLoader(
        MNIST('./data/', train=False, download=True, transform=data_transformation),
        batch_size=test_batch_size,
        shuffle=False
    )

    return training_data_loader, testing_data_loader


def create_positive_data(data, label):
    positive_data = data.clone()

    return positive_data


def create_negative_data(data, label, num_classes=10, seed=1234):
    """
    This function modifies the labels to create negative samples and returns both the data and one-hot encoded negative labels.
    
    Args:
    - data (torch.Tensor): Tensor of data samples.
    - label (torch.Tensor): Original labels of the data.
    - num_classes (int): Number of classes in the dataset.
    - seed (int, optional): Seed for random number generator.
    
    Returns:
    - torch.Tensor: Original data (unchanged here, but could be modified if necessary).
    - torch.Tensor: One-hot encoded negative labels.
    """
    if seed is not None:
        random.seed(seed)

    negative_data = data.clone()
    negative_labels = torch.zeros(data.size(0), num_classes, device=data.device)

    for i in range(negative_data.shape[0]):
        possible_answers = list(range(num_classes))
        possible_answers.remove(label[i].item())
        false_label = random.choice(possible_answers)
        negative_labels[i, false_label] = 1

    return negative_data, negative_labels

def create_one_hot_labels(labels, num_classes=10):
    """
    This function takes a batch of labels and the number of classes and
    returns a one-hot encoded tensor of the labels.

    Args:
    - labels (torch.Tensor): A tensor of labels of size (N, )
    - num_classes (int): The number of classes in the dataset

    Returns:
    - torch.Tensor: A one-hot encoded tensor of size (N, num_classes)
    """
    # Create a tensor of zeros with size [len(labels), num_classes]
    one_hot_labels = torch.zeros(len(labels), num_classes, device=labels.device)
    
    # Use scatter_ to assign 1s based on label indices
    one_hot_labels.scatter_(1, labels.unsqueeze(1), 1)
    
    return one_hot_labels

def prepare_data():
    torch.manual_seed(4321)
    training_data_loader, testing_data_loader = load_FashionMNIST_data()

    training_data, training_data_label = next(iter(training_data_loader))

    testing_data, testing_data_label = next(iter(testing_data_loader))
    testing_data, testing_data_label = testing_data.cuda(), testing_data_label.cuda()

    print(f"Training Data: ", training_data)
    print(f"Training Data Label: ", training_data_label)

    training_data, training_data_label = training_data.cuda(), training_data_label.cuda()
    
    positive_data = create_positive_data(training_data, training_data_label)
    print(f"Positive Data: ", positive_data)

    negative_data, negative_one_hot_labels = create_negative_data(training_data.cuda(), training_data_label.cuda())
    print(f"Negative Data: ", negative_data)

    return positive_data, negative_data, negative_one_hot_labels, training_data, training_data_label, testing_data, testing_data_label, training_data_loader

def measure_execution_time_with_uncertainty(model, input_data, repetitions=100):
    times = []  # List to store execution times for each repetition

    for _ in range(repetitions):
        start_time = time.perf_counter()  # Start timing before prediction
        _ = model.predict(input_data)  # Execute the predict function
        end_time = time.perf_counter()  # End timing after prediction
        times.append(end_time - start_time)  # Append the time difference to the list

    times = np.array(times)  # Convert list to NumPy array for easier calculations
    average_time = np.mean(times)  # Calculate the average of the execution times
    std_deviation = np.std(times)  # Calculate the standard deviation of the execution times

    return average_time, std_deviation


def measure_slow_execution_time_with_uncertainty(model, input_data, repetitions=100):
    times = []  # List to store execution times for each repetition

    for _ in range(repetitions):
        start_time = time.perf_counter()  # Start timing before prediction
        _ = model.slow_predict(input_data)  # Execute the predict function
        end_time = time.perf_counter()  # End timing after prediction
        times.append(end_time - start_time)  # Append the time difference to the list

    times = np.array(times)  # Convert list to NumPy array for easier calculations
    average_time = np.mean(times)  # Calculate the average of the execution times
    std_deviation = np.std(times)  # Calculate the standard deviation of the execution times

    return average_time, std_deviation

def measure_single_execution_time_with_uncertainty(model, input_data, repetitions=100):
    model.eval()  # Ensure the model is in evaluation mode
    times = []  # List to store execution times for each repetition

    for _ in range(repetitions):
        # Ensure a single instance is loaded to GPU
        single_data = input_data[0:1].cuda()  # Taking the first instance for measurement
        torch.cuda.synchronize()  # Wait for all prior operations to complete

        start_time = time.perf_counter()  # Start timing
        with torch.no_grad():  # Context manager to turn off gradient computation
            _ = model.predict(single_data)  # Execute the predict function
        torch.cuda.synchronize()  # Ensure completion of CUDA operations

        end_time = time.perf_counter()  # End timing
        times.append(end_time - start_time)  # Append the time difference to the list

    times = np.array(times)  # Convert list to NumPy array for easier calculations
    average_time = np.mean(times)  # Calculate the average of the execution times
    std_deviation = np.std(times)  # Calculate the standard deviation of the execution times

    return average_time, std_deviation


def measure_single_slow_execution_time_with_uncertainty(model, input_data, repetitions=100):
    model.eval()  # Ensure the model is in evaluation mode
    times = []  # List to store execution times for each repetition

    for _ in range(repetitions):
        # Ensure a single instance is loaded to GPU
        single_data = input_data[0:1].cuda()  # Taking the first instance for measurement
        torch.cuda.synchronize()  # Wait for all prior operations to complete

        start_time = time.perf_counter()  # Start timing
        with torch.no_grad():  # Context manager to turn off gradient computation
            _ = model.slow_predict(single_data)  # Execute the predict function
        torch.cuda.synchronize()  # Ensure completion of CUDA operations

        end_time = time.perf_counter()  # End timing
        times.append(end_time - start_time)  # Append the time difference to the list

    times = np.array(times)  # Convert list to NumPy array for easier calculations
    average_time = np.mean(times)  # Calculate the average of the execution times
    std_deviation = np.std(times)  # Calculate the standard deviation of the execution times

    return average_time, std_deviation


def measure_individual_throughput(model, data_loader, repetitions=3):
    model.eval()  # Ensure the model is in evaluation mode
    throughputs = []

    for _ in range(repetitions):
        total_images = 0
        start_time = time.time()

        for image, _ in data_loader:  # image is now a single instance, not a batch
            image = image.cuda(non_blocking=True)  # Transfer data to GPU if available
            with torch.no_grad():  # No need to compute gradients
                _ = model.predict(image)  # Perform predictions

            total_images += 1  # Increment image count by one

        elapsed_time = time.time() - start_time
        throughput = total_images / elapsed_time  # Calculate throughput as images per second
        throughputs.append(throughput)

    # Compute average and standard deviation of throughput
    average_throughput = np.mean(throughputs)
    std_dev_throughput = np.std(throughputs)
    
    return average_throughput, std_dev_throughput



def measure_slow_individual_throughput(model, data_loader, repetitions=3):
    model.eval()  # Ensure the model is in evaluation mode
    throughputs = []

    for _ in range(repetitions):
        total_images = 0
        start_time = time.time()

        for image, _ in data_loader:  # image is now a single instance, not a batch
            image = image.cuda(non_blocking=True)  # Transfer data to GPU if available
            with torch.no_grad():  # No need to compute gradients
                _ = model.slow_predict(image)  # Perform predictions

            total_images += 1  # Increment image count by one

        elapsed_time = time.time() - start_time
        throughput = total_images / elapsed_time  # Calculate throughput as images per second
        throughputs.append(throughput)

    # Compute average and standard deviation of throughput
    average_throughput = np.mean(throughputs)
    std_dev_throughput = np.std(throughputs)
    
    return average_throughput, std_dev_throughput

if __name__ == "__main__":
    torch.cuda.empty_cache()
    torch.manual_seed(1234)
    positive_data, negative_data, negative_one_hot_labels, training_data, training_data_label, testing_data, testing_data_label, training_data_loader = prepare_data()
    positive_one_hot_labels = create_one_hot_labels(training_data_label)
    network = Network([784, hidden_layer_neurons, hidden_layer_neurons]).cuda() #3072
    network.train_network(positive_data, negative_data, training_data, training_data_label, positive_one_hot_labels, negative_one_hot_labels)
    del Network.layer_differences
    del positive_data
    del negative_data
    del Network.positive_labels
    del Network.negative_labels
    torch.cuda.empty_cache()
    print("Training Accuracy: ", network.predict(training_data).eq(training_data_label).float().mean().item())
    print("Testing Accuracy: ", network.predict(testing_data).eq(testing_data_label).float().mean().item())
    average_time, uncertainty = measure_execution_time_with_uncertainty(network, testing_data)
    print(f"Average Execution Time: {average_time:.6f} seconds ± {uncertainty:.6f} seconds")
    average_time, uncertainty = measure_slow_execution_time_with_uncertainty(network, testing_data)
    print(f"SLOW_Average Execution Time: {average_time:.6f} seconds ± {uncertainty:.6f} seconds")
    single_average_time, single_uncertainty = measure_single_execution_time_with_uncertainty(network, testing_data)
    print(f"Single Data Execution Time: {single_average_time:.6f} seconds ± {single_uncertainty:.6f} seconds")
    single_average_time, single_uncertainty = measure_single_slow_execution_time_with_uncertainty(network, testing_data)
    print(f"SLOW_Single Data Execution Time: {single_average_time:.6f} seconds ± {single_uncertainty:.6f} seconds")
    




[notice] A new release of pip is available: 24.1.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 24.1.1 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Training Data:  tensor([[-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        ...,
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102]])
Training Data Label:  tensor([9, 0, 0,  ..., 3, 0, 5])
Positive Data:  tensor([[-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        ...,
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102],
        [-0.8102, -0.8102, -0.8102,  ..., -0.8102, -0.8102, -0.8102]],
       device='cuda:0')
Negative Data:  tenso

  0%|          | 0/1 [00:00<?, ?it/s]

Training Layer 0 ...


100%|██████████| 1/1 [00:02<00:00,  2.24s/it]

Training Layer 1 ...
SHAPE: torch.Size([10, 60000])
Training Accuracy:  0.6818666458129883
SHAPE: torch.Size([10, 10000])
Testing Accuracy:  0.675000011920929



