In [138]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

CLOCK_SPEED = 450 #MHZ

# 1. Hyperparameters
batch_size = 1
learning_rate = 0.001
num_epochs = 1

# Check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# 2. Data Preprocessing: No resizing, keep original 32x32 size
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale
    transforms.RandomHorizontalFlip(p=0.5),       # Randomly flip the image horizontally with a probability of 50%
    transforms.RandomRotation(degrees=15),        # Randomly rotate the image within ±15 degrees
    transforms.RandomResizedCrop(size=32, scale=(0.8, 1.0)),  # Random crop and resize back to 32x32
    transforms.ColorJitter(brightness=0.2, contrast=0.2),      # Randomly adjust brightness and contrast
    transforms.ToTensor(),                        # Convert to tensor
    transforms.Normalize(mean=[0.5], std=[0.5])   # Normalize to [-1, 1]
])


# 3. Load CIFAR-10 Dataset
full_train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
#full_test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# 4. Filter Dataset for Autombile and Dogs
def filter_Automobile_dogs(dataset):
    targets = torch.tensor(dataset.targets)  # Convert labels to a tensor
    mask = (targets == 1) | (targets == 5)  # Keep only labels 1 (automobile) and 5 (dog)
    dataset.targets = targets[mask].tolist()  # Update targets
    dataset.data = dataset.data[mask.numpy()]  # Update data
    return dataset

train_dataset = filter_Automobile_dogs(full_train_dataset)
#test_dataset = filter_Automobile_dogs(full_test_dataset)

# 5. Update Labels: Map [1, 5] -> [0, 1]
def remap_labels(dataset):
    dataset.targets = [0 if label == 1 else 1 for label in dataset.targets]  # Automobile = 0, Dog = 1
    return dataset

train_dataset = remap_labels(train_dataset)
#test_dataset = remap_labels(test_dataset)

# 6. Data Loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
#test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

####################################################################################################################

#outputs_final = {}
total_cache_hits = 0
total_cache_misses = 0
total_rpq = 0


random_rpq_matrix = torch.randn(1024, 20, device=device).uniform_(-1,1) # Move RPQ matrix to GPU #mean = 0 and var = 1

# RPQ Function
def rpq(input_vector, rows, columns):
    flattened_vector = input_vector.view(input-1)
    # Dot product of input vector and R
    signature = torch.matmul(flattened_vector, random_rpq_matrix)

    # Quantization -> sign-based
    signature_quantized = torch.where(signature < 0, torch.tensor(1.0, device=signature.device), torch.tensor(0.0, device=signature.device))
    return signature_quantized


# 7. Define a Simple CNN
class SimpleCNN_with_RPQ_Layer(nn.Module):
    def __init__(self):
        super(SimpleCNN_with_RPQ_Layer, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 2)  # 2 classes: automobile and dog
        
        self.mcache = {}
        self.cache_hits = 0
        self.cache_misses = 0 
        self.binary = ''
    
    def reset(self):
        self.mcache.clear()
        self.cache_hits = 0
        self.cache_misses = 0 
        print("Mcache Reset")
        
    def forward(self):
        global total_cache_hits, total_cache_misses, total_rpq
        #reset mcache, cache_hits and cache_misses
        self.reset()
        
        # first conv layer
        weights_conv1 = {}
        #weights_conv2 = {}
        
        j = 0
        for input_image, input_label in train_loader:
            input_image = input_image.to(device)
            # Generate RPQ signature and binary key
            start_rpq = perf_counter()
            rpq_signature_output = rpq(input_image, 1024, 20)# 1024 coz 32 * 32 (right now) , 30 columns coz signature length 
            end_rpq = perf_counter()
            total_rpq += (end_rpq - start_rpq)
            #Convert the binary tensor to a string representation of binary
            self.binary_key = ''.join(map(str, rpq_signature_output.int().tolist()))  # tensor to list and then to string
    
            #cache mechanism
            if self.binary_key in self.mcache:
                self.cache_hits += 1
                weights_conv1[j] = self.mcache[self.binary_key]
            else :
                self.cache_misses += 1
                weights_conv1[j] = torch.relu(self.conv1(input_image))
                self.mcache[self.binary_key] = weights_conv1[j]        
            j += 1
        
        print("First Layer Training Complete")
        total_cache_hits += self.cache_hits
        total_cache_misses += self.cache_misses
        print("First layer->Cache_hits", self.cache_hits)
        print("First Layer->Cache_misses", self.cache_misses)
        
#         # Second layer is continued using the output of the First conv layer (weights_conv1[]) 
#         j = 0
#         print("WEIGHTS:",len(weights_conv1))
#         for i in range(0,len(weights_conv1[0])): # will iterate through 16 feature maps
#             for j in range(0,len(weights_conv1[0][i])): # each input image 32*32
#                 print("WEIGHTS_SHAPE:",weights_conv1[0][i][j].shape)
#                 # Generate RPQ signature and binary key
#                 rpq_signature_output = rpq(weights_conv1[0][i][j], 1024, 20) # 1024 coz 32 * 32 (right now) , 20 columns coz signature length 

#                 # Convert the binary tensor to a string representation of binary
#                 self.binary_key = ''.join(map(str, rpq_signature_output.tolist()))  # tensor to list and then to string
                
#                 if self.binary in self.mcache :
#                     self.cache_hits += 1
#                     weights_conv2[j] = self.mcache[i][self.binary_key]
#                 else :
#                     self.cache_misses += 1
#                     weights_conv2[j] = self.pool(torch.relu(self.conv2(weights_conv1[i]))) 
#                     self.mcache[i][self.binary_key] = weights_conv2[i]
#                 j += 1
            
#         print("Second layer Training Complete")            
#         total_cache_hits += self.cache_hits
#         total_cache_misses += self.cache_misses
#         print("Second layer->Cache_hits", self.cache_hits)
#         print("Second Layer->Cache_misses", self.cache_misses)
        
        #The other layers(Pool,FC1,FC2) is continued using the outputs of the Second conv layer (weights_conv2[])
        for i in range(0,len(weights_conv1)):
            x = torch.relu(self.conv2(weights_conv1[i]))
            x = self.pool(x)
            x = x.view(-1, 32 * 8 * 8)
            x = torch.relu(self.fc1(x))
            x = self.fc2(x)

model = SimpleCNN_with_RPQ_Layer().to(device)  # Move model to GPU


######################################################################################################################
# 8. Training Loop 
# Forward Propogation
sync = 0
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    start = perf_counter()
    model()
    end = perf_counter()
    sync += (end - start)
    print(f"TOTAL TIME TAKEN in EPOCH: {end - start} seconds")

print(f"TOTAL RPQ TIME: {total_rpq} seconds")
print(f"CYCLES: {sync * CLOCK_SPEED}*10e6")
print("Training complete!")
print("Average_Cache_hits :", total_cache_hits/num_epochs)
print("Average_Cache_misses:", total_cache_misses/num_epochs)

Using device: cuda
Files already downloaded and verified
Epoch 1/1
Mcache Reset
First Layer Training Complete
First layer->Cache_hits 1805
First Layer->Cache_misses 8195
TOTAL TIME TAKEN in EPOCH: 13.135185200007982 seconds
TOTAL RPQ TIME: 1.7139770996873267 seconds
CYCLES: 5910.833340003592*10e6
Training complete!
Average_Cache_hits : 1805.0
Average_Cache_misses: 8195.0


In [None]:

# 8. Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# loss calculation and Bqckward Propogation  
    ##########################################################    
        loss = criterion(outputs, input_label)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()

        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

# 10. Test the Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")