In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import numpy as np

# Load pretrained VGG16 model and set it for training
vgg16 = models.vgg16(pretrained=True)
vgg16.train()  # Set model to training mode

# Define a loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.01)

# Cache for WS-RPQ
class MCACHE:
    def __init__(self):
        self.cache = {}

    def lookup(self, signature):
        return self.cache.get(signature)

    def insert(self, signature, result):
        self.cache[signature] = result

# Function to generate a binary signature using RPQ
def generate_signature(input_tensor, random_matrix):
    input_vector = input_tensor.view(-1)  # Flatten input tensor to a vector
    projected = torch.matmul(input_vector, random_matrix)
    signature = tuple((projected > 0).int().tolist())
    return signature

# Initialize a sample input tensor to determine dimensions after initial layers
sample_input = torch.randn(1, 3, 224, 224)  # Single RGB image of size 224x224
initial_output = vgg16.features[:4](sample_input)  # Output from first few layers
flattened_dim = initial_output.view(-1).shape[0]  # Determine flattened size

# Initialize the random projection matrix for RPQ with compatible dimensions
projection_dim = 512  # Dimension for the RPQ signature
random_matrix = torch.randn(flattened_dim, projection_dim)  # Adjusted random matrix size

# Initialize cache
mcache = MCACHE()

# Tracking computation counts
baseline_computation_count = 0
ws_rpq_computation_count = 0
ws_rpq_skipped_computations = 0

# Function to simulate computation counts for VGG16 (approximation)
def compute_mac_count(model):
    # Calculate MACs per layer
    mac_count = 0
    for layer in model.features:  # Iterate only over convolutional layers
        if isinstance(layer, nn.Conv2d):
            output_size = 7 * 7  # Approximate VGG16 output size after last conv layer
            mac_count += layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * output_size
    return mac_count

# Baseline computation count for full forward pass
baseline_mac_count = compute_mac_count(vgg16)

# Training iterations simulation
iterations = 10  # Reduced number of iterations for brevity
input_data = torch.randn(1, 3, 224, 224)  # Single RGB image of size 224x224
target = torch.tensor([1])  # Target label as a 1D tensor with a class index

for i in range(iterations):
    # Baseline System: Full computation in forward pass each time
    optimizer.zero_grad()
    output = vgg16(input_data)  # Use entire model, including fully connected layers
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()

    baseline_computation_count += baseline_mac_count  # Count computations for baseline

    # WS-RPQ System: Check cache before computing forward pass
    optimizer.zero_grad()
    
    # Generate a signature based on the output of initial layers in the feature extractor
    initial_output = vgg16.features[:4](input_data)  # Take first few layers for signature
    input_signature = generate_signature(initial_output, random_matrix)
    cached_output = mcache.lookup(input_signature)

    if cached_output is not None:
        # Cache hit, reuse result and skip forward computation
        output = cached_output
        ws_rpq_skipped_computations += baseline_mac_count  # Count skipped computations due to caching
    else:
        # Cache miss, full computation in forward pass
        output = vgg16(input_data)
        mcache.insert(input_signature, output)
        ws_rpq_computation_count += baseline_mac_count  # Count MACs only when not cached
    
    # Complete backpropagation and update weights for WS-RPQ system
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()

    # Print results after each iteration
    print(f"Iteration {i + 1}")
    print(f"Baseline System - Total Computations: {baseline_computation_count}")
    print(f"WS-RPQ System - Total Computations: {ws_rpq_computation_count}")
    print(f"WS-RPQ System - Total Computations Skipped due to Caching: {ws_rpq_skipped_computations}\n")

    # Simulate new input for the next iteration
    input_data = torch.randn(1, 3, 224, 224)  # Change input to simulate different inputs


Iteration 1
Baseline System - Total Computations: 720812736
WS-RPQ System - Total Computations: 720812736
WS-RPQ System - Total Computations Skipped due to Caching: 0

Iteration 2
Baseline System - Total Computations: 1441625472
WS-RPQ System - Total Computations: 1441625472
WS-RPQ System - Total Computations Skipped due to Caching: 0

Iteration 3
Baseline System - Total Computations: 2162438208
WS-RPQ System - Total Computations: 2162438208
WS-RPQ System - Total Computations Skipped due to Caching: 0

Iteration 4
Baseline System - Total Computations: 2883250944
WS-RPQ System - Total Computations: 2883250944
WS-RPQ System - Total Computations Skipped due to Caching: 0

Iteration 5
Baseline System - Total Computations: 3604063680
WS-RPQ System - Total Computations: 3604063680
WS-RPQ System - Total Computations Skipped due to Caching: 0

Iteration 6
Baseline System - Total Computations: 4324876416
WS-RPQ System - Total Computations: 4324876416
WS-RPQ System - Total Computations Skipped d

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import numpy as np

# Load pretrained VGG16 model and set it for training
vgg16 = models.vgg16(pretrained=True)
vgg16.train()  # Set model to training mode

# Define a loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.01)

# Tracking computation counts (MACs)
baseline_computation_count = 0

# Function to approximate MAC count for VGG16's convolutional layers
def compute_mac_count(model):
    mac_count = 0
    for layer in model.features:
        if isinstance(layer, nn.Conv2d):
            output_size = 7 * 7  # Approximate VGG16 output size after last conv layer
            mac_count += layer.in_channels * layer.out_channels * layer.kernel_size[0] * layer.kernel_size[1] * output_size
    return mac_count

# Baseline computation count per full forward pass
baseline_mac_count = compute_mac_count(vgg16)

# Training iterations simulation
iterations = 10
input_data = torch.randn(1, 3, 224, 224)  # Single RGB image of size 224x224
target = torch.tensor([1])  # Target label as a 1D tensor with a class index

for i in range(iterations):
    # Baseline System: Full computation in forward pass each time
    optimizer.zero_grad()
    output = vgg16(input_data)
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()

    # Update computation count
    baseline_computation_count += baseline_mac_count

    # Print results after each iteration
    print(f"Iteration {i + 1}")
    print(f"Baseline System - Total Computations: {baseline_computation_count}\n")


Iteration 1
Baseline System - Total Computations: 720812736

Iteration 2
Baseline System - Total Computations: 1441625472

Iteration 3
Baseline System - Total Computations: 2162438208

Iteration 4
Baseline System - Total Computations: 2883250944

Iteration 5
Baseline System - Total Computations: 3604063680

Iteration 6
Baseline System - Total Computations: 4324876416

Iteration 7
Baseline System - Total Computations: 5045689152

Iteration 8
Baseline System - Total Computations: 5766501888

Iteration 9
Baseline System - Total Computations: 6487314624

Iteration 10
Baseline System - Total Computations: 7208127360



In [13]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import vgg16
from torch.utils.data import DataLoader, Dataset

# RPQ Implementation
class RPQ(nn.Module):
    def __init__(self, input_dim, proj_dim):
        super(RPQ, self).__init__()
        self.proj_matrix = nn.Parameter(torch.randn(input_dim, proj_dim))
        self.quant_levels = 2  # Binary quantization

    def forward(self, x):
        # Random Projection
        proj = torch.matmul(x, self.proj_matrix)
        # Quantization
        quantized = torch.sign(proj)
        return quantized

# Custom Dataset
class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Baseline VGG16 Model
class BaselineVGG16(nn.Module):
    def __init__(self):
        super(BaselineVGG16, self).__init__()
        self.vgg = vgg16(pretrained=True)
        self.fc = nn.Linear(1000, 1)  # Assuming binary classification for simplicity

    def forward(self, x):
        x = self.vgg(x)
        return self.fc(x)

# Mercury VGG16 Model with RPQ and MCACHE
class MercuryVGG16(nn.Module):
    def __init__(self, proj_dim):
        super(MercuryVGG16, self).__init__()
        self.vgg = vgg16(pretrained=True)
        self.rpq = RPQ(1000, proj_dim)
        self.fc = nn.Linear(proj_dim, 1)
        self.mcache = {}

    def forward(self, x):
        x = self.vgg(x)
        signature = self.rpq(x)
        signature_tuple = tuple(signature.view(-1).tolist())  # Convert to hashable type
        if signature_tuple in self.mcache:
            return self.mcache[signature_tuple]
        else:
            result = self.fc(signature)
            self.mcache[signature_tuple] = result
            return result

# Example usage
input_dim = 1000
proj_dim = 100
data = [torch.randn(3, 224, 224) for _ in range(1000)]  # Example image data

# DataLoader setup
dataset = MyDataset(data)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=0)  # Set num_workers to 0

# Models
baseline_model = BaselineVGG16()
mercury_model = MercuryVGG16(proj_dim)

# Processing data
baseline_outputs = []
mercury_outputs = []

for batch in dataloader:
    # Ensure batch is a sequence of tensors
    batch = torch.stack([item for item in batch])
    baseline_outputs.append(baseline_model(batch))
    mercury_outputs.append(mercury_model(batch))

# Calculate computations skipped
total_computations = len(data) * input_dim
skipped_computations = len(mercury_model.mcache) * proj_dim
print(f"Total computations: {total_computations}")
print(f"Skipped computations: {skipped_computations}")


RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 102760448 bytes.

In [1]:
import torch
import torch.nn as nn
import numpy as np

class RPQ:
    def __init__(self, input_dim, projection_dim, quant_bits):
        """
        Initialize the RPQ module.
        :param input_dim: Dimensionality of the input vectors.
        :param projection_dim: Dimensionality after random projection.
        :param quant_bits: Number of bits for quantization.
        """
        self.input_dim = input_dim
        self.projection_dim = projection_dim
        self.quant_bits = quant_bits
        
        # Random projection matrix (Gaussian random matrix)
        self.projection_matrix = torch.randn(projection_dim, input_dim)
        
    def random_projection(self, x):
        """
        Perform random projection on the input vector.
        :param x: Input tensor of shape (batch_size, input_dim).
        :return: Projected tensor of shape (batch_size, projection_dim).
        """
        return torch.matmul(x, self.projection_matrix.T)
    
    def quantize(self, x):
        """
        Quantize the projected vector into a bit sequence.
        :param x: Projected tensor of shape (batch_size, projection_dim).
        :return: Quantized bit sequence of shape (batch_size, projection_dim).
        """
        # Normalize the projected values to the range [0, 1]
        x_min = x.min(dim=1, keepdim=True)[0]
        x_max = x.max(dim=1, keepdim=True)[0]
        x_normalized = (x - x_min) / (x_max - x_min + 1e-8)
        
        # Quantize to a discrete set of values based on the number of bits
        quantized_x = torch.floor(x_normalized * (2 ** self.quant_bits)).int()
        
        return quantized_x
    
    def compute_signature(self, x):
        """
        Compute the RPQ signature for the input vector.
        :param x: Input tensor of shape (batch_size, input_dim).
        :return: Signature tensor of shape (batch_size, projection_dim).
        """
        projected_x = self.random_projection(x)
        signature = self.quantize(projected_x)
        
        return signature

# Example usage
if __name__ == "__main__":
    # Define dimensions
    input_dim = 100  # Input vector dimension
    projection_dim = 20  # Projected dimension
    quant_bits = 4  # Number of bits for quantization
    
    # Create an instance of RPQ
    rpq = RPQ(input_dim=input_dim, projection_dim=projection_dim, quant_bits=quant_bits)
    
    # Generate random input vectors
    batch_size = 10
    inputs = torch.randn(batch_size, input_dim)
    
    # Compute RPQ signatures
    signatures = rpq.compute_signature(inputs)
    
    print("Input Vectors:\n", inputs)
    print("\nRPQ Signatures:\n", signatures)

Input Vectors:
 tensor([[ 9.6785e-01,  5.5491e-01,  5.5497e-01,  1.6345e+00,  2.1755e-01,
          5.7401e-01, -5.9625e-01,  3.8363e-01, -3.5030e-01,  7.2375e-01,
         -1.5138e-01,  6.0988e-01,  1.7080e+00,  1.4348e-02, -2.8090e+00,
          8.8090e-01,  2.2408e-01, -3.2965e-01,  7.3395e-02, -2.4874e+00,
          1.8154e+00,  1.8836e+00,  1.0003e+00, -8.7520e-01,  3.6680e-01,
          5.3246e-01, -1.1220e+00, -9.2499e-01, -7.6712e-01, -5.5280e-01,
         -1.1257e+00, -7.3899e-02,  1.2441e+00,  1.1490e+00,  1.2418e+00,
          6.2535e-01, -4.0941e-01, -1.1545e+00,  1.3722e+00, -1.2433e+00,
          1.6865e-01, -1.4706e-01, -1.9251e-01,  1.2917e+00, -1.3703e-01,
         -1.0265e+00,  6.5060e-01, -1.8871e-01, -1.2930e+00,  9.6918e-01,
          1.2805e-01, -1.8162e+00,  8.4305e-01,  1.3501e+00,  2.9058e-01,
         -2.5975e-02,  7.7666e-01, -7.0208e-01, -6.1886e-01,  6.9238e-01,
         -1.5172e+00,  1.4811e+00, -9.9828e-01, -1.9762e+00, -2.7931e-01,
          7.9241e-01, 

In [2]:
import torch
import torch.nn as nn
import numpy as np

class RPQ:
    def __init__(self, input_dim, projection_dim, quant_bits):
        """
        Initialize the RPQ module.
        :param input_dim: Dimensionality of the input vectors.
        :param projection_dim: Dimensionality after random projection.
        :param quant_bits: Number of bits for quantization.
        """
        self.input_dim = input_dim
        self.projection_dim = projection_dim
        self.quant_bits = quant_bits
        
        # Random projection matrix (Gaussian random matrix)
        self.projection_matrix = torch.randn(projection_dim, input_dim)
        
    def random_projection(self, x):
        """
        Perform random projection on the input vector.
        :param x: Input tensor of shape (batch_size, input_dim).
        :return: Projected tensor of shape (batch_size, projection_dim).
        """
        return torch.matmul(x, self.projection_matrix.T)
    
    def quantize(self, x):
        """
        Quantize the projected vector into a bit sequence.
        :param x: Projected tensor of shape (batch_size, projection_dim).
        :return: Quantized bit sequence of shape (batch_size, projection_dim).
        """
        # Normalize the projected values to the range [0, 1]
        x_min = x.min(dim=1, keepdim=True)[0]
        x_max = x.max(dim=1, keepdim=True)[0]
        x_normalized = (x - x_min) / (x_max - x_min + 1e-8)
        
        # Quantize to a discrete set of values based on the number of bits
        quantized_x = torch.floor(x_normalized * (2 ** self.quant_bits)).int()
        
        return quantized_x
    
    def compute_signature(self, x):
        """
        Compute the RPQ signature for the input vector.
        :param x: Input tensor of shape (batch_size, input_dim).
        :return: Signature tensor of shape (batch_size, projection_dim).
        """
        projected_x = self.random_projection(x)
        signature = self.quantize(projected_x)
        
        return signature

# Cache mechanism to store and compare signatures
class InputSimilarityCache:
    def __init__(self):
        # Dictionary to store signatures and their corresponding results
        self.cache = {}
    
    def check_similarity(self, signature):
        """
        Check if a given signature is already in the cache.
        :param signature: The signature to check.
        :return: Boolean indicating whether a similar signature exists in cache.
                 If found, return True and the cached result; otherwise return False.
        """
        sig_tuple = tuple(signature.view(-1).tolist())  # Convert tensor to tuple for hashing
        
        if sig_tuple in self.cache:
            return True, self.cache[sig_tuple]
        
        return False, None
    
    def store_signature(self, signature, result):
        """
        Store a new signature and its corresponding result in cache.
        :param signature: The signature to store.
        :param result: The result associated with this signature.
        """
        sig_tuple = tuple(signature.view(-1).tolist())  # Convert tensor to tuple for hashing
        self.cache[sig_tuple] = result

# Example usage
if __name__ == "__main__":
    # Define dimensions
    input_dim = 100  # Input vector dimension
    projection_dim = 20  # Projected dimension
    quant_bits = 4  # Number of bits for quantization
    
    # Create an instance of RPQ and cache
    rpq = RPQ(input_dim=input_dim, projection_dim=projection_dim, quant_bits=quant_bits)
    cache = InputSimilarityCache()
    
    # Generate random input vectors
    batch_size = 10
    inputs = torch.randn(batch_size, input_dim)
    
    for i in range(batch_size):
        
        # Compute RPQ signature for each input vector
        signature = rpq.compute_signature(inputs[i].unsqueeze(0))
        
        # Check if a similar input has been processed before
        is_similar, cached_result = cache.check_similarity(signature)
        
        if is_similar:
            print(f"Input {i}: Similarity found! Reusing cached result.")
            print(f"Cached Result: {cached_result}")
            
            # Here you would reuse the cached result instead of recomputing it
            
            continue
        
        else:
            print(f"Input {i}: No similarity found. Performing computation.")
            
            # Perform some computation (e.g., forward pass through a neural network)
            result = inputs[i].sum()  # Example computation
            
            # Store the new result in cache along with its signature
            cache.store_signature(signature, result)

            print(f"Computed Result: {result}")

Input 0: No similarity found. Performing computation.
Computed Result: -10.730279922485352
Input 1: No similarity found. Performing computation.
Computed Result: -0.7254507541656494
Input 2: No similarity found. Performing computation.
Computed Result: -0.9253287315368652
Input 3: No similarity found. Performing computation.
Computed Result: -8.969884872436523
Input 4: No similarity found. Performing computation.
Computed Result: 2.577206611633301
Input 5: No similarity found. Performing computation.
Computed Result: 4.021418571472168
Input 6: No similarity found. Performing computation.
Computed Result: -5.820130348205566
Input 7: No similarity found. Performing computation.
Computed Result: -2.6905899047851562
Input 8: No similarity found. Performing computation.
Computed Result: -3.367391347885132
Input 9: No similarity found. Performing computation.
Computed Result: 9.375436782836914


In [3]:
import torch

# Define dimensions
input_dim = 100  # Input vector dimension
projection_dim = 20  # Projected dimension
quant_bits = 4  # Number of bits for quantization

# Create an instance of RPQ and cache
rpq = RPQ(input_dim=input_dim, projection_dim=projection_dim, quant_bits=quant_bits)
cache = InputSimilarityCache()

# Generate a base input vector
base_input = torch.randn(1, input_dim)

# Generate slightly perturbed versions of the base input
batch_size = 10
inputs = base_input + 0.01 * torch.randn(batch_size, input_dim)

for i in range(batch_size):
    # Compute RPQ signature for each input vector
    signature = rpq.compute_signature(inputs[i].unsqueeze(0))
    
    # Check if a similar input has been processed before
    is_similar, cached_result = cache.check_similarity(signature)
    
    if is_similar:
        print(f"Input {i}: Similarity found! Reusing cached result.")
        print(f"Cached Result: {cached_result}")
        continue
    else:
        print(f"Input {i}: No similarity found. Performing computation.")
        
        # Perform some computation (e.g., forward pass through a neural network)
        result = inputs[i].sum()  # Example computation
        
        # Store the new result in cache along with its signature
        cache.store_signature(signature, result)

        print(f"Computed Result: {result}")

Input 0: No similarity found. Performing computation.
Computed Result: -19.56744956970215
Input 1: Similarity found! Reusing cached result.
Cached Result: -19.56744956970215
Input 2: No similarity found. Performing computation.
Computed Result: -19.66850471496582
Input 3: Similarity found! Reusing cached result.
Cached Result: -19.56744956970215
Input 4: Similarity found! Reusing cached result.
Cached Result: -19.56744956970215
Input 5: Similarity found! Reusing cached result.
Cached Result: -19.56744956970215
Input 6: Similarity found! Reusing cached result.
Cached Result: -19.56744956970215
Input 7: Similarity found! Reusing cached result.
Cached Result: -19.56744956970215
Input 8: Similarity found! Reusing cached result.
Cached Result: -19.56744956970215
Input 9: Similarity found! Reusing cached result.
Cached Result: -19.56744956970215
