In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Binarized Conv2D Layer
class BinarizedConv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(BinarizedConv2D, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
 
        
    def forward(self, x):
        # Binarize the weights
        binarized_weights = torch.sign(self.conv.weight)
        return F.conv2d(x, binarized_weights, self.conv.bias, self.conv.stride, self.conv.padding)

# Simple Network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.binarized_conv = BinarizedConv2D(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.fc = nn.Linear(64 * 3 * 3, 1)  # Flattened 3x3 output to a single scalar
    
    def forward(self, x):
        x = F.relu(self.binarized_conv(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# Create the network
model = SimpleNet()
for param in model.parameters():
    if param.requires_grad:
        print(param.data)
# Define the loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error loss
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Prepare the data
input_tensor = torch.ones(1, 1, 3, 3)  # Batch size 1, 1 channel, 3x3 image
target = torch.tensor([9.0])  # Correct answer is 9

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    
    # Zero gradients
    optimizer.zero_grad()
    
    # Forward pass
    output = model(input_tensor)
    
    # Compute the loss
    loss = criterion(output, target)
    
    # Backward pass and optimize
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch [{epoch}/{num_epochs}], Loss: {loss.item()}')

# Test the network
model.eval()
with torch.no_grad():
    output = model(input_tensor)
    print(f'Predicted output: {output.item()}')

tensor([[[[-0.3235, -0.1142, -0.2569],
          [ 0.3151,  0.1815, -0.0572],
          [ 0.1813,  0.2240, -0.0657]]],


        [[[-0.0246, -0.1054,  0.0211],
          [-0.3312,  0.2878,  0.0015],
          [ 0.2827,  0.1188, -0.0476]]],


        [[[-0.0374, -0.1837,  0.1866],
          [-0.1761, -0.3094, -0.2416],
          [-0.1913, -0.2855,  0.1398]]],


        [[[ 0.2311,  0.1743,  0.0419],
          [ 0.3215, -0.0928, -0.0852],
          [-0.0517, -0.1484, -0.1222]]],


        [[[ 0.2716,  0.0569, -0.0018],
          [-0.0613,  0.1806, -0.1027],
          [-0.0942, -0.0720,  0.2632]]],


        [[[ 0.1584,  0.2487,  0.3259],
          [ 0.3252,  0.1288, -0.1190],
          [ 0.2233, -0.0473,  0.1669]]],


        [[[ 0.1816,  0.0632,  0.0724],
          [-0.2809,  0.1243,  0.0291],
          [-0.1784, -0.2751,  0.0555]]],


        [[[-0.0788,  0.1377, -0.0208],
          [-0.2186,  0.2356, -0.0669],
          [ 0.0473,  0.1626, -0.1805]]],


        [[[-0.3285, -0.0145,  0.

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [300/1000], Loss: 0.0013570773880928755
Epoch [400/1000], Loss: 2.386980486335233e-05
Epoch [500/1000], Loss: 4.193143468000926e-07
Epoch [600/1000], Loss: 7.531525625381619e-09
Epoch [700/1000], Loss: 5.238689482212067e-10
Epoch [800/1000], Loss: 5.238689482212067e-10
Epoch [900/1000], Loss: 5.238689482212067e-10
Predicted output: 8.999977111816406


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from custom_bnn import BNNConv2d
class SimpleNet2(nn.Module): 
    def __init__(self):
        super(SimpleNet2, self).__init__()
        self.binarized_conv = BNNConv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.fc = nn.Linear(64 * 3 * 3, 1)  # Flattened 3x3 output to a single scalar
    
    def forward(self, x):
        x = F.relu(self.binarized_conv(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x

# Create the network
model2 = SimpleNet2()

# Define the loss function and optimizer
criterion = nn.MSELoss()  # Mean Squared Error loss
optimizer2 = optim.SGD(model2.parameters(), lr=0.0001)

# Prepare the data
input_tensor = torch.ones(1, 1, 3, 3)  # Batch size 1, 1 channel, 3x3 image
target = torch.tensor([-18.0])  # Correct answer is 9

# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    model2.train()
    
    # Zero gradients
    optimizer2.zero_grad()
    
    # Forward pass
    output = model2(input_tensor)
    
    # Compute the loss
    loss2 = criterion(output, target)
    
    # Backward pass and optimize
    loss2.backward()
    optimizer2.step()
    
    if epoch % 100 == 0:
        print(f'Epoch [{epoch}/{num_epochs}], Loss: {loss2.item()}')

# Test the network
model2.eval()
with torch.no_grad():
    output = model2(input_tensor)
    print(f'Predicted output: {output.item()}')

ImportError: cannot import name 'BNNConv2d' from 'custom_bnn' (d:\study\S3\Project thesis\Work\custom_bnn.py)

In [2]:
for param in model2.parameters():
    if torch.isnan(param).any():
        print("NaN detected in parameters")

In [3]:
for param in model2.parameters():

        print(param.data)

tensor([[[[-3.0531e-04,  5.9963e-04, -3.1483e-04],
          [-4.0678e-04, -4.7712e-04, -2.7545e-04],
          [-1.8555e-04,  3.1659e-04, -4.6537e-04]]],


        [[[ 7.3230e-04, -2.4850e-05, -5.2456e-04],
          [-4.2127e-04, -9.8822e-05, -1.3536e-04],
          [-9.4681e-04,  7.1551e-04,  3.4888e-04]]],


        [[[-3.4143e-05,  6.8131e-04,  9.3344e-04],
          [ 1.3991e-03,  1.6811e-03,  1.1179e-03],
          [-7.9395e-05,  5.1198e-04,  7.9454e-04]]],


        [[[ 3.0490e-04, -8.2551e-04,  4.1526e-04],
          [-6.1821e-04,  1.2758e-03,  5.8175e-04],
          [ 1.1742e-04, -4.0906e-04,  9.7777e-04]]],


        [[[ 3.0363e-04, -1.3289e-04, -5.9140e-04],
          [ 1.0974e-03,  1.2387e-03,  1.7357e-04],
          [ 9.8778e-04,  9.4199e-04,  1.3228e-03]]],


        [[[ 1.2046e-03,  9.8128e-04, -3.6269e-04],
          [ 1.0454e-03,  9.7377e-04, -5.0830e-04],
          [ 1.0988e-03,  4.7435e-04, -2.8166e-04]]],


        [[[-9.7225e-04, -3.8115e-05, -3.1524e-05],
       

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from custom_bnn import CBNNConv2d
class SimpleNet2(nn.Module): 
    def __init__(self):
        super(SimpleNet2, self).__init__()
        self.binarized_conv1 = CBNNConv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.binarized_conv2 = CBNNConv2d(in_channels=64, out_channels=32, kernel_size=3, padding=1)
        self.fc = nn.Linear(25088, 10)  # Flattened 3x3 output to a single scalar
    
    def forward(self, x):
        x = F.relu(self.binarized_conv1(x))
        x = F.relu(self.binarized_conv2(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x
model =SimpleNet2()

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from custom_bnn import BNNLinear
class BinaryLinear(nn.Module):
    def __init__(self, in_features, out_features, bias=True):
        super(BinaryLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = nn.Parameter(torch.randn(out_features, in_features))
        if bias:
            self.bias = nn.Parameter(torch.zeros(out_features))
        else:
            self.register_parameter('bias', None)

    def forward(self, x):
        # Binarize the weights
        binary_weight = torch.sign(self.weight)
        # Apply the linear transformation
        x = F.linear(x, binary_weight, self.bias)
        return x



class BNNNet(nn.Module):
    def __init__(self):
        super(BNNNet, self).__init__()
        self.fc1 = BNNLinear(28 * 28, 512)
        self.fc2 = BNNLinear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the MNIST images
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # No activation at the output layer
        return x 
model =BNNNet()
 

In [15]:
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
# Training settings
batch_size = 256
learning_rate = .0001
epochs = 10

# Prepare MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Load the full dataset
full_dataset = datasets.MNIST('../data', train=True, download=True, transform=transform)

# Define dataset split ratios
train_size = int(0.8 * len(full_dataset))  # 80% for training
val_size = len(full_dataset) - train_size  # The rest for validation

# Split dataset
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


In [16]:
def train(model, device, train_loader, optimizer, criterion):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)  
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                  f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

def evaluate(model, device, val_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data) 
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100. * correct / total
    return accuracy


In [18]:
# Instantiate the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define optimizer and loss function
# optimizer = optim.SGD(model.parameters(), lr=learning_rate)
lr=.00001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

# Training loop
for epoch in range(1, epochs + 1):
    print(f"\nTraining Model for Epoch {epoch}")
    train(model, device, train_loader, optimizer, criterion)
    
    print(f"\nEvaluating Model on Validation Set for Epoch {epoch}")
    accuracy = evaluate(model, device, val_loader)
    print(f'Validation Accuracy: {accuracy:.2f}%')
    accuracy = evaluate(model, device, train_loader)
    print(f'Validation Accuracy: {accuracy:.2f}%')



Training Model for Epoch 1

Evaluating Model on Validation Set for Epoch 1
Validation Accuracy: 86.12%
Validation Accuracy: 87.16%

Training Model for Epoch 2

Evaluating Model on Validation Set for Epoch 2
Validation Accuracy: 65.40%
Validation Accuracy: 66.63%

Training Model for Epoch 3

Evaluating Model on Validation Set for Epoch 3
Validation Accuracy: 87.87%
Validation Accuracy: 89.17%

Training Model for Epoch 4

Evaluating Model on Validation Set for Epoch 4
Validation Accuracy: 88.34%
Validation Accuracy: 89.40%

Training Model for Epoch 5

Evaluating Model on Validation Set for Epoch 5
Validation Accuracy: 87.65%
Validation Accuracy: 88.83%

Training Model for Epoch 6

Evaluating Model on Validation Set for Epoch 6
Validation Accuracy: 88.92%
Validation Accuracy: 90.05%

Training Model for Epoch 7

Evaluating Model on Validation Set for Epoch 7
Validation Accuracy: 88.61%
Validation Accuracy: 89.62%

Training Model for Epoch 8

Evaluating Model on Validation Set for Epoch 8


In [13]:
print(device)

cuda


In [20]:
torch.save(model.state_dict(), 'model.pth')

In [23]:
model.load_state_dict(torch.load('model.pth'))

RuntimeError: Error(s) in loading state_dict for ResNet34:
	Missing key(s) in state_dict: "conv1.weight", "layer1.0.conv1.weight", "layer1.0.conv2.weight", "layer1.1.conv1.weight", "layer1.1.conv2.weight", "layer1.2.conv1.weight", "layer1.2.conv2.weight", "layer2.0.conv1.weight", "layer2.0.conv2.weight", "layer2.0.downsample.0.weight", "layer2.1.conv1.weight", "layer2.1.conv2.weight", "layer2.2.conv1.weight", "layer2.2.conv2.weight", "layer2.3.conv1.weight", "layer2.3.conv2.weight", "layer3.0.conv1.weight", "layer3.0.conv2.weight", "layer3.0.downsample.0.weight", "layer3.1.conv1.weight", "layer3.1.conv2.weight", "layer3.2.conv1.weight", "layer3.2.conv2.weight", "layer3.3.conv1.weight", "layer3.3.conv2.weight", "layer3.4.conv1.weight", "layer3.4.conv2.weight", "layer3.5.conv1.weight", "layer3.5.conv2.weight", "layer4.0.conv1.weight", "layer4.0.conv2.weight", "layer4.0.downsample.0.weight", "layer4.1.conv1.weight", "layer4.1.conv2.weight", "layer4.2.conv1.weight", "layer4.2.conv2.weight". 
	Unexpected key(s) in state_dict: "conv1.conv.weight", "conv1.conv.bias", "layer1.0.conv1.conv.weight", "layer1.0.conv1.conv.bias", "layer1.0.conv2.conv.weight", "layer1.0.conv2.conv.bias", "layer1.1.conv1.conv.weight", "layer1.1.conv1.conv.bias", "layer1.1.conv2.conv.weight", "layer1.1.conv2.conv.bias", "layer1.2.conv1.conv.weight", "layer1.2.conv1.conv.bias", "layer1.2.conv2.conv.weight", "layer1.2.conv2.conv.bias", "layer2.0.conv1.conv.weight", "layer2.0.conv1.conv.bias", "layer2.0.conv2.conv.weight", "layer2.0.conv2.conv.bias", "layer2.0.downsample.0.conv.weight", "layer2.0.downsample.0.conv.bias", "layer2.1.conv1.conv.weight", "layer2.1.conv1.conv.bias", "layer2.1.conv2.conv.weight", "layer2.1.conv2.conv.bias", "layer2.2.conv1.conv.weight", "layer2.2.conv1.conv.bias", "layer2.2.conv2.conv.weight", "layer2.2.conv2.conv.bias", "layer2.3.conv1.conv.weight", "layer2.3.conv1.conv.bias", "layer2.3.conv2.conv.weight", "layer2.3.conv2.conv.bias", "layer3.0.conv1.conv.weight", "layer3.0.conv1.conv.bias", "layer3.0.conv2.conv.weight", "layer3.0.conv2.conv.bias", "layer3.0.downsample.0.conv.weight", "layer3.0.downsample.0.conv.bias", "layer3.1.conv1.conv.weight", "layer3.1.conv1.conv.bias", "layer3.1.conv2.conv.weight", "layer3.1.conv2.conv.bias", "layer3.2.conv1.conv.weight", "layer3.2.conv1.conv.bias", "layer3.2.conv2.conv.weight", "layer3.2.conv2.conv.bias", "layer3.3.conv1.conv.weight", "layer3.3.conv1.conv.bias", "layer3.3.conv2.conv.weight", "layer3.3.conv2.conv.bias", "layer3.4.conv1.conv.weight", "layer3.4.conv1.conv.bias", "layer3.4.conv2.conv.weight", "layer3.4.conv2.conv.bias", "layer3.5.conv1.conv.weight", "layer3.5.conv1.conv.bias", "layer3.5.conv2.conv.weight", "layer3.5.conv2.conv.bias", "layer4.0.conv1.conv.weight", "layer4.0.conv1.conv.bias", "layer4.0.conv2.conv.weight", "layer4.0.conv2.conv.bias", "layer4.0.downsample.0.conv.weight", "layer4.0.downsample.0.conv.bias", "layer4.1.conv1.conv.weight", "layer4.1.conv1.conv.bias", "layer4.1.conv2.conv.weight", "layer4.1.conv2.conv.bias", "layer4.2.conv1.conv.weight", "layer4.2.conv1.conv.bias", "layer4.2.conv2.conv.weight", "layer4.2.conv2.conv.bias". 

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from mmcv.runner import EpochBasedRunner, CheckpointHook
from bnn import BNNConv2d
import logging 

class SimpleNet2(nn.Module): 
    def __init__(self):
        super(SimpleNet2, self).__init__()
        self.binarized_conv1 = BNNConv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.binarized_conv2 = BNNConv2d(in_channels=64, out_channels=32, kernel_size=3, padding=1)
        self.fc = nn.Linear(25088, 10)  # Adjust input size to match your actual input
    
    def forward(self, x):
        x = F.relu(self.binarized_conv1(x))
        x = F.relu(self.binarized_conv2(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x
    def train_step(self, data, optimizer, **kwargs):
        """
        The `train_step` method that MMCV's runner will call during training.
        """
        self.train()  # Set the model to training mode
        inputs, labels = data
        outputs = self(inputs)
        
        # Assume CrossEntropyLoss is used
        loss = F.cross_entropy(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # MMCV expects a dict with 'loss' key for logging
        return {'loss': loss.item()}

# Instantiate the model, optimizer, and criterion
model = SimpleNet2()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
logging.basicConfig(level=logging.INFO)  # You can change the level to DEBUG if you want more details
logger = logging.getLogger()

# Dummy data loader for illustration
# Replace with your actual data loader
train_loader = [(torch.randn(32, 1, 28, 28), torch.randint(0, 10, (32,))) for _ in range(100)]

# Runner configuration
runner = EpochBasedRunner(
    model=model,
    optimizer=optimizer,
    work_dir='./checkpoints',  # Directory to save checkpoints
    logger=logger
)

# Register a checkpoint hook
checkpoint_hook = CheckpointHook(interval=1, max_keep_ckpts=5)
runner.register_hook(checkpoint_hook)

# Training loop configuration
runner.run(
    data_loaders=[train_loader],  # Data loaders for training
    workflow=[('train', 1)],  # Workflow (train for 1 epoch at a time)
    max_epochs=10  # Total number of epochs
)

# To manually save a checkpoint at any time, you can call:
# runner.save_checkpoint('./checkpoints', filename_tmpl='epoch_{}.pth')


INFO:root:Start running, host: Desktop@HPDESKTOP, work_dir: d:\study\S3\Project thesis\Work\checkpoints
INFO:root:Hooks will be executed in the following order:
before_run:
(NORMAL      ) CheckpointHook                     
 -------------------- 
after_train_iter:
(NORMAL      ) CheckpointHook                     
 -------------------- 
after_train_epoch:
(NORMAL      ) CheckpointHook                     
 -------------------- 
INFO:root:workflow: [('train', 1)], max: 10 epochs
INFO:root:Checkpoints will be saved to d:\study\S3\Project thesis\Work\checkpoints by HardDiskBackend.
INFO:root:Saving checkpoint at 1 epochs


KeyboardInterrupt: 

In [None]:
from mmcv.runner import load_checkpoint
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from custom_bnn import CBNNConv2d
class SimpleNet2(nn.Module): 
    def __init__(self):
        super(SimpleNet2, self).__init__()
        self.binarized_conv1 = CBNNConv2d(in_channels=1, out_channels=64, kernel_size=3, padding=1)
        self.binarized_conv2 = CBNNConv2d(in_channels=64, out_channels=32, kernel_size=3, padding=1)
        self.fc = nn.Linear(25088, 10)  # Flattened 3x3 output to a single scalar
    
    def forward(self, x):
        x = F.relu(self.binarized_conv1(x))
        x = F.relu(self.binarized_conv2(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc(x)
        return x
model=SimpleNet2()
# # Load the checkpoint
# checkpoint_path = './checkpoints/latest.pth'
# load_checkpoint(model, checkpoint_path)

In [None]:
torch.save(model.state_dict(), "model.pth")

In [None]:
import torch
import torch.profiler as profiler

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleNet2().to(device)
data = torch.randn(256,1,28,28)  # Batch size of 32, 10 features
data=data.to(device)
# Define a function to perform a forward pass
def forward_pass():
    model(data)

# Set up the profiler
with profiler.profile(
    activities=[profiler.ProfilerActivity.CPU, profiler.ProfilerActivity.CUDA],  # Include CUDA if using GPU
    record_shapes=True,  # Record tensor shapes
    with_stack=True  # Record stack traces
) as prof:
    forward_pass()  # Run the forward pass inside the profiler

# Print the profiling results
print(prof.key_averages().table(sort_by="self_cpu_time_total"))

NameError: name 'SimpleNet2' is not defined

In [None]:
print(torch.cuda.memory_summary())

|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    1904 KB |  400731 KB |  747534 KB |  745629 KB |
|       from large pool |       0 KB |  397824 KB |  741378 KB |  741378 KB |
|       from small pool |    1904 KB |    4978 KB |    6155 KB |    4251 KB |
|---------------------------------------------------------------------------|
| Active memory         |    1904 KB |  400731 KB |  747534 KB |  745629 KB |
|       from large pool |       0 KB |  397824 KB |  741378 KB |  741378 KB |
|       from small pool |    1904 KB |    4978 KB |    6155 KB |    4251 KB |
|---------------------------------------------------------------

In [None]:
import torch.profiler as profiler
def train_one_step(model, optimizer, criterion, inputs, targets):
    model.train()
    optimizer.zero_grad()

    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)

    # Backward pass
    loss.backward()
    optimizer.step()

    return loss.item()

def profile_training_step(model, optimizer, criterion, inputs, targets):
    with profiler.profile(
        activities=[profiler.ProfilerActivity.CPU, profiler.ProfilerActivity.CUDA],
        record_shapes=True,
        with_stack=True
    ) as prof:
        for _ in range(6):  # Warmup and then profiling
            train_one_step(model, optimizer, criterion, inputs, targets)

    print(prof.key_averages().table(sort_by="self_cuda_time_total"))

In [None]:
device="cuda"
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

# Dummy data
inputs = torch.randn(256, 1, 28, 28).to(device)  # Batch size 64, MNIST-like input
targets = torch.randint(0, 10, (256,)).to(device)  # Random target classes
profile_training_step(model, optimizer, criterion, inputs, targets)

-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                aten::cudnn_convolution        29.21%     824.485ms        29.21%     824.485ms       3.817ms     766.922ms        25.14%     766.922ms       3.551ms           216  
                             aten::convolution_backward         6.09%     171.925ms         6.16%     173.835ms     804.792us     369.656ms        12.12%     373.035ms       1.727ms           216  
         

In [21]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from custom_bnn import CBNNConv2d
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 =CBNNConv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = CBNNConv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ResNet34(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet34, self).__init__()
        self.in_channels = 64

        # Initial convolutional layer
        self.conv1 = CBNNConv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)  # Assuming grayscale input
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Residual layers
        self.layer1 = self._make_layer(BasicBlock, 64, 3)
        self.layer2 = self._make_layer(BasicBlock, 128, 4, stride=2)
        self.layer3 = self._make_layer(BasicBlock, 256, 6, stride=2)
        self.layer4 = self._make_layer(BasicBlock, 512, 3, stride=2)

        # Fully connected layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * BasicBlock.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                CBNNConv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [29]:
model=ResNet34()

In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from bnn import BNNConv2d


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 =BNNConv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = BNNConv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ResNet34(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet34, self).__init__()
        self.in_channels = 64

        # Initial convolutional layer
        self.conv1 = BNNConv2d(1, 64, kernel_size=7, stride=2, padding=3)  # Assuming grayscale input
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Residual layers
        self.layer1 = self._make_layer(BasicBlock, 64, 3)
        self.layer2 = self._make_layer(BasicBlock, 128, 4, stride=2)
        self.layer3 = self._make_layer(BasicBlock, 256, 6, stride=2)
        self.layer4 = self._make_layer(BasicBlock, 512, 3, stride=2)

        # Fully connected layer
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * BasicBlock.expansion, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or self.in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                BNNConv2d(self.in_channels, out_channels * block.expansion, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels * block.expansion),
            )

        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x