In [69]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from time import perf_counter

CLOCK_SPEED = 450  #(MHz) (Average speed of my RTX 4060)

# 1. Hyperparameters
batch_size = 1
learning_rate = 0.001
num_epochs = 10 

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# 2. Data Preprocessing: No resizing, keep original 32x32 size
transform = transforms.Compose([
      transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
      transforms.ToTensor(),                        # Convert to tensor
      transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize to [-1, 1]
])

# 3. Load CIFAR-10 Dataset
full_train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

# 6. Data Loaders
train_loader = torch.utils.data.DataLoader(full_train_dataset, batch_size=batch_size, shuffle=True)

##########################################################################################################################

# RPQ Function
def rpq(input_vector, rows, columns):
    flattened_vector = input_vector.view(input_vector.size(0),-1)
    # Dot product of input vector and R
    signature = torch.matmul(flattened_vector, random_rpq_matrix)

    # Quantization -> sign-based
    signature_quantized = torch.where(signature < 0, torch.ones_like(signature), torch.zeros_like(signature))
    return signature_quantized

# 7. Define a Simple CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 2)  # 2 classes: automobile and dog

    def forward(self,x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 32 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        
model = SimpleCNN().to(device)  # Move model to GPU

# 8. Training Loop with CUDA Timing
sync = 0
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    cache_hits = 0
    cache_misses = 0
    mcache = []
    total_rpq = 0
    random_rpq_matrix = torch.randn(1024, 20, device=device).uniform_(-1,1) # Move RPQ matrix to GPU #mean = 0 and var = 1
    start = perf_counter()
    # Forward pass
    for input_image, _ in train_loader:
        input_image = input_image.to(device)
        start_rpq = perf_counter()
        rpq_signature_output = rpq(input_image, 1024, 20)# 1024 coz 32 * 32 (right now) , 30 columns coz signature length
        end_rpq = perf_counter()
        binary_key = ''.join(map(str, rpq_signature_output.int().tolist()))  # tensor to list and then to string
        total_rpq += end_rpq - start_rpq
        #cache mechanism
        if binary_key in mcache:
            cache_hits += 1
        else:
            cache_misses += 1
            model(input_image)
            mcache.append(binary_key)
        torch.cuda.synchronize()
    end = perf_counter()
    sync += (end - start)

    # Compute elapsed time
    print(f"Cache_hits:{cache_hits}")
    print(f"Cache_misses:{cache_misses}")
    print(f"TOTAL TIME TAKEN in EACH EPOCH: {end - start} s")
    print(f"TOTAL RPQ TIME TAKEN in EACH EPOCH: {total_rpq} s")

print(f"Total time:{sync}s")
print(f"CYCLES: {sync * CLOCK_SPEED}*10e6")
print("Training complete!")


Using device: cuda
Files already downloaded and verified
Epoch 1/10
Cache_hits:17579
Cache_misses:32421
TOTAL TIME TAKEN in EACH EPOCH: 49.189934499998344 s
TOTAL RPQ TIME TAKEN in EACH EPOCH: 6.332384100787749 s
Epoch 2/10
Cache_hits:17041
Cache_misses:32959
TOTAL TIME TAKEN in EACH EPOCH: 47.2378624999983 s
TOTAL RPQ TIME TAKEN in EACH EPOCH: 5.957383401386323 s
Epoch 3/10
Cache_hits:19905
Cache_misses:30095
TOTAL TIME TAKEN in EACH EPOCH: 45.77651920000062 s
TOTAL RPQ TIME TAKEN in EACH EPOCH: 5.9716581004540785 s
Epoch 4/10
Cache_hits:15483
Cache_misses:34517
TOTAL TIME TAKEN in EACH EPOCH: 49.064020199999504 s
TOTAL RPQ TIME TAKEN in EACH EPOCH: 5.996649000553589 s
Epoch 5/10
Cache_hits:16712
Cache_misses:33288
TOTAL TIME TAKEN in EACH EPOCH: 47.9579137000037 s
TOTAL RPQ TIME TAKEN in EACH EPOCH: 6.114930899864703 s
Epoch 6/10
Cache_hits:15909
Cache_misses:34091
TOTAL TIME TAKEN in EACH EPOCH: 50.57327919999807 s
TOTAL RPQ TIME TAKEN in EACH EPOCH: 6.318023198597075 s
Epoch 7/10
C

In [68]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from time import perf_counter

CLOCK_SPEED = 450  #(MHz) (Average speed of my RTX 4060)

# 1. Hyperparameters
batch_size = 1
learning_rate = 0.001
num_epochs = 10

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# 2. Data Preprocessing: No resizing, keep original 32x32 size
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.RandomHorizontalFlip(p=0.5),       # Randomly flip the image horizontally with a probability of 50%
    transforms.RandomRotation(degrees=15),        # Randomly rotate the image within ±15 degrees
    transforms.RandomResizedCrop(size=32, scale=(0.8, 1.0)),  # Random crop and resize back to 32x32
    transforms.ColorJitter(brightness=0.2, contrast=0.2),      # Randomly adjust brightness and contrast
    transforms.ToTensor(),                        # Convert to tensor
])

# 3. Load CIFAR-10 Dataset
full_train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

# 4. Data Loaders
train_loader = torch.utils.data.DataLoader(full_train_dataset, batch_size=batch_size, shuffle=True)
############################################################################################################

# 5. Define a Simple CNN
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 10)  # 10 classes

    def forward(self,x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 32 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = SimpleCNN().to(device)  # Move model to GPU

# 6. Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# 7. Training Loop with CUDA Timing
sync = 0
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    start = perf_counter()
    # Forward pass
    for input_image, input_label in train_loader:
        input_image, input_label = input_image.to(device), input_label.to(device)
        outputs = model(input_image)
        
    end = perf_counter()
    sync += (end - start)

    # Compute elapsed time
    print(f"TOTAL TIME TAKEN in EACH EPOCH: {end - start} s")

print(f"Total time:{sync}s")
print(f"CYCLES: {sync * CLOCK_SPEED}*10e6")
print("Training complete!")

Using device: cuda
Files already downloaded and verified
Epoch 1/10
TOTAL TIME TAKEN in EACH EPOCH: 51.14707940000517 s
Epoch 2/10
TOTAL TIME TAKEN in EACH EPOCH: 46.89836210000067 s
Epoch 3/10
TOTAL TIME TAKEN in EACH EPOCH: 47.446700899999996 s
Epoch 4/10
TOTAL TIME TAKEN in EACH EPOCH: 46.65250780000497 s
Epoch 5/10
TOTAL TIME TAKEN in EACH EPOCH: 52.09534079999867 s
Epoch 6/10
TOTAL TIME TAKEN in EACH EPOCH: 47.12849020000431 s
Epoch 7/10
TOTAL TIME TAKEN in EACH EPOCH: 47.315138099998876 s
Epoch 8/10
TOTAL TIME TAKEN in EACH EPOCH: 47.809754499998235 s
Epoch 9/10
TOTAL TIME TAKEN in EACH EPOCH: 47.55387899999914 s
Epoch 10/10
TOTAL TIME TAKEN in EACH EPOCH: 47.25544610000361 s
Total time:481.30269890001364s
CYCLES: 216586.21450500615*10e6
Training complete!
