In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import custom_cnn

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
batch_size = 64
num_epochs = 100
learning_rate = 0.0001

In [4]:
train_transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
    ]
)
test_transform = transforms.Compose(
    [
        transforms.ToTensor(),
    ]
)

In [5]:
train_dataset = datasets.CIFAR10(
    root="../data/dataset", train=True, transform=train_transform, download=True
)
test_dataset = datasets.CIFAR10(
    root="../data/dataset", train=False, transform=test_transform, download=True
)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
train_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=batch_size, shuffle=False
)

In [7]:
model = custom_cnn.CNNModel()
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward propagation
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print loss per epoch
    print(
        f"Epoch: [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}"
    )

Epoch: [1/100], Loss: 1.5281
Epoch: [2/100], Loss: 1.1570
Epoch: [3/100], Loss: 1.0103
Epoch: [4/100], Loss: 0.9113
Epoch: [5/100], Loss: 0.8424
Epoch: [6/100], Loss: 0.7853
Epoch: [7/100], Loss: 0.7403
Epoch: [8/100], Loss: 0.6979
Epoch: [9/100], Loss: 0.6611
Epoch: [10/100], Loss: 0.6289
Epoch: [11/100], Loss: 0.5959
Epoch: [12/100], Loss: 0.5646
Epoch: [13/100], Loss: 0.5356
Epoch: [14/100], Loss: 0.5095
Epoch: [15/100], Loss: 0.4813
Epoch: [16/100], Loss: 0.4565
Epoch: [17/100], Loss: 0.4338
Epoch: [18/100], Loss: 0.4105
Epoch: [19/100], Loss: 0.3898
Epoch: [20/100], Loss: 0.3711
Epoch: [21/100], Loss: 0.3499
Epoch: [22/100], Loss: 0.3305
Epoch: [23/100], Loss: 0.3160
Epoch: [24/100], Loss: 0.2949
Epoch: [25/100], Loss: 0.2814
Epoch: [26/100], Loss: 0.2633
Epoch: [27/100], Loss: 0.2495
Epoch: [28/100], Loss: 0.2389
Epoch: [29/100], Loss: 0.2282
Epoch: [30/100], Loss: 0.2202
Epoch: [31/100], Loss: 0.2052
Epoch: [32/100], Loss: 0.1921
Epoch: [33/100], Loss: 0.1850
Epoch: [34/100], Lo

In [8]:
torch.save(model.state_dict(), "trained_model.pth")

In [9]:
def test_model(test_loader, model, device):
    model.to("cpu")
    model.eval()
    correct = 0
    total = 0
    predicted_labels = []
    true_labels = []
    
    total_latency = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            
            # Measure latency and CPU usage
            
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            predicted_labels.extend(predicted.to('cpu').tolist())
            true_labels.extend(labels.to('cpu').tolist())
    
    # Calculate averages
    avg_latency_per_batch = total_latency / len(test_loader)
    accuracy = 100 * correct / total
    
    print(f"Test Accuracy: {accuracy:.2f}%")
    
    return accuracy,predicted_labels, true_labels

In [10]:
test_model(test_loader,model,device)

Test Accuracy: 23.62%


(23.62,
 [3,
  8,
  8,
  3,
  3,
  3,
  3,
  3,
  3,
  8,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  8,
  3,
  3,
  0,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  8,
  3,
  3,
  4,
  3,
  3,
  0,
  6,
  3,
  6,
  8,
  3,
  3,
  3,
  3,
  3,
  8,
  8,
  3,
  3,
  8,
  8,
  3,
  5,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  3,
  2,
  3,
  3,
  3,
  3,
  8,
  8,
  8,
  3,
  0,
  3,
  3,
  8,
  8,
  8,
  3,
  3,
  3,
  8,
  3,
  3,
  8,
  3,
  8,
  3,
  8,
  3,
  3,
  3,
  4,
  8,
  0,
  3,
  3,
  3,
  3,
  3,
  8,
  8,
  3,
  3,
  8,
  3,
  4,
  2,
  2,
  3,
  8,
  3,
  3,
  3,
  3,
  3,
  8,
  3,
  3,
  3,
  8,
  3,
  8,
  3,
  3,
  8,
  3,
  3,
  8,
  3,
  3,
  3,
  3,
  3,
  3,
  8,
  3,
  5,
  3,
  5,
  8,
  3,
  3,
  5,
  5,
  3,
  8,
  3,
  3,
  3,
  3,
  5,
  3,
  8,
  3,
  3,
  3,
  8,
  3,
  3,
  8,
  3,
  8,
  3,
  5,
  8,
  3,
  3,
  3,
  8,
  3,
  3,
  3,
  3,
  3,
  8,
  3,
  3,
  2,
  3,
  3,
  8,
  3,
  3,
  3,
  8,
  5,
  3,
  3,
  3,
  3,
  3,
  8,
  3,
  

In [11]:
import torch
import numpy as np

# Load your model

# Define the mapping from parameter names to filenames
param_to_filename = {
    'conv_layers.0.weight': 'conv2d_kernel_0.bin',
    'conv_layers.0.bias': 'conv2d_bias_0.bin',
    'conv_layers.1.weight': 'batch_normalization_gamma_0.bin',
    'conv_layers.1.bias': 'batch_normalization_beta_0.bin',
    'conv_layers.1.running_mean': 'batch_normalization_moving_mean_0.bin',
    'conv_layers.1.running_var': 'batch_normalization_moving_variance_0.bin',

    'conv_layers.4.weight': 'conv2d_1_kernel_0.bin',
    'conv_layers.4.bias': 'conv2d_1_bias_0.bin',
    'conv_layers.5.weight': 'batch_normalization_1_gamma_0.bin',
    'conv_layers.5.bias': 'batch_normalization_1_beta_0.bin',
    'conv_layers.5.running_mean': 'batch_normalization_1_moving_mean_0.bin',
    'conv_layers.5.running_var': 'batch_normalization_1_moving_variance_0.bin',

    'conv_layers.8.weight': 'conv2d_2_kernel_0.bin',
    'conv_layers.8.bias': 'conv2d_2_bias_0.bin',
    'conv_layers.9.weight': 'batch_normalization_2_gamma_0.bin',
    'conv_layers.9.bias': 'batch_normalization_2_beta_0.bin',
    'conv_layers.9.running_mean': 'batch_normalization_2_moving_mean_0.bin',
    'conv_layers.9.running_var': 'batch_normalization_2_moving_variance_0.bin',

    'fc_layers.2.weight': 'dense_kernel_0.bin',
    'fc_layers.2.bias': 'dense_bias_0.bin',
    'fc_layers.3.weight': 'batch_normalization_3_gamma_0.bin',
    'fc_layers.3.bias': 'batch_normalization_3_beta_0.bin',
    'fc_layers.3.running_mean': 'batch_normalization_3_moving_mean_0.bin',
    'fc_layers.3.running_var': 'batch_normalization_3_moving_variance_0.bin',

    'fc_layers.6.weight': 'dense_1_kernel_0.bin',
    'fc_layers.6.bias': 'dense_1_bias_0.bin',
    'fc_layers.7.weight': 'batch_normalization_4_gamma_0.bin',
    'fc_layers.7.bias': 'batch_normalization_4_beta_0.bin',
    'fc_layers.7.running_mean': 'batch_normalization_4_moving_mean_0.bin',
    'fc_layers.7.running_var': 'batch_normalization_4_moving_variance_0.bin',
}

# Iterate over the model's state_dict and save each parameter
for param_name, param_value in model.state_dict().items():
    if param_name in param_to_filename:
        # Convert the tensor to a NumPy array
        param_numpy = param_value.cpu().numpy()
        # Save to binary file
        param_numpy.tofile(f'../data/weights/{param_to_filename[param_name]}')
        print(f"Saved {param_name} to {param_to_filename[param_name]}")
    else:
        print(f"Parameter {param_name} not mapped to a filename.")

Saved conv_layers.0.weight to conv2d_kernel_0.bin
Saved conv_layers.0.bias to conv2d_bias_0.bin
Saved conv_layers.1.weight to batch_normalization_gamma_0.bin
Saved conv_layers.1.bias to batch_normalization_beta_0.bin
Saved conv_layers.1.running_mean to batch_normalization_moving_mean_0.bin
Saved conv_layers.1.running_var to batch_normalization_moving_variance_0.bin
Parameter conv_layers.1.num_batches_tracked not mapped to a filename.
Saved conv_layers.4.weight to conv2d_1_kernel_0.bin
Saved conv_layers.4.bias to conv2d_1_bias_0.bin
Saved conv_layers.5.weight to batch_normalization_1_gamma_0.bin
Saved conv_layers.5.bias to batch_normalization_1_beta_0.bin
Saved conv_layers.5.running_mean to batch_normalization_1_moving_mean_0.bin
Saved conv_layers.5.running_var to batch_normalization_1_moving_variance_0.bin
Parameter conv_layers.5.num_batches_tracked not mapped to a filename.
Saved conv_layers.8.weight to conv2d_2_kernel_0.bin
Saved conv_layers.8.bias to conv2d_2_bias_0.bin
Saved conv_l

In [12]:
import torch
import numpy as np
model.eval()  # Set to evaluation mode

# Dump weights for convolutional layers
conv_counter = 1
for layer in model.conv_layers:
    if isinstance(layer, nn.Conv2d):
        # Save weights
        weights = layer.weight.data.cpu().numpy()
        weights.tofile(f'conv{conv_counter}_weights.bin')
        
        # Save biases
        biases = layer.bias.data.cpu().numpy()
        biases.tofile(f'conv{conv_counter}_biases.bin')
        
        conv_counter += 1

# Dump weights for fully connected layers
fc_counter = 1
for layer in model.fc_layers:
    if isinstance(layer, nn.Linear):
        # Save weights
        weights = layer.weight.data.cpu().numpy()
        weights.tofile(f'fc{fc_counter}_weights.bin')
        
        # Save biases (if they exist)
        if layer.bias is not None:
            biases = layer.bias.data.cpu().numpy()
            biases.tofile(f'fc{fc_counter}_biases.bin')
        
        fc_counter += 1

print("All weights and biases dumped successfully!")

All weights and biases dumped successfully!


In [None]:
import tensorflow as tf
import numpy as np

# Data parameters
BATCH_SIZE = 64
NUM_EPOCHS = 100
LEARNING_RATE = 0.0001

# Ensure eager execution
tf.config.run_functions_eagerly(True)

# Data preprocessing functions
def preprocess_train(image, label):
    # Scale to [0,1] and normalize
    image = tf.cast(image, tf.float32) / 255.0
    mean = tf.constant([0.4914, 0.4822, 0.4465], shape=(1, 1, 3))
    std = tf.constant([0.247, 0.243, 0.261], shape=(1, 1, 3))
    image = (image - mean) / std
    return image, tf.cast(label, tf.int64)

def preprocess_test(image, label):
    # Only scale to [0,1] without normalization
    image = tf.cast(image, tf.float32) / 255.0
    return image, tf.cast(label, tf.int64)

# Load and prepare CIFAR-10 dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.cifar10.load_data()

# Convert labels to int64
train_labels = train_labels.astype(np.int64)
test_labels = test_labels.astype(np.int64)

# Create TensorFlow datasets with explicit dtype
train_dataset = tf.data.Dataset.from_tensor_slices(
    (tf.cast(train_images, tf.float32), tf.cast(train_labels, tf.int64))
)
test_dataset = tf.data.Dataset.from_tensor_slices(
    (tf.cast(test_images, tf.float32), tf.cast(test_labels, tf.int64))
)

# Apply preprocessing
train_dataset = train_dataset.map(preprocess_train, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.map(preprocess_test, num_parallel_calls=tf.data.AUTOTUNE)

# Batch and shuffle datasets
train_dataset = train_dataset.shuffle(1024).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Model definition
def create_model():
    return tf.keras.Sequential([
        # Conv Block 1
        tf.keras.layers.Conv2D(32, 3, padding='same', input_shape=(32, 32, 3)),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.MaxPooling2D(2),
        
        # Conv Block 2
        tf.keras.layers.Conv2D(64, 3, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.MaxPooling2D(2),
        
        # Conv Block 3
        tf.keras.layers.Conv2D(128, 3, padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.ReLU(),
        tf.keras.layers.MaxPooling2D(2),
        
        # Classifier
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(10, activation='softmax')  # Added softmax activation
    ])

# Create and compile model
model = create_model()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),  # Removed from_logits=True
    metrics=['accuracy']
)

# Train the model
history = model.fit(
    train_dataset,
    epochs=NUM_EPOCHS,
    validation_data=test_dataset
)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_dataset)
print(f"\nTest accuracy: {test_acc*100:.2f}%")

# Save the entire model
model.save('trained_model.h5')

# Save weights in PyTorch-style binary format (optional)
def save_weights_torch_style(model):
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Conv2D):
            np.array(layer.kernel.numpy(), dtype='float32').tofile(f'{layer.name}_kernel.bin')
            if layer.bias is not None:
                np.array(layer.bias.numpy(), dtype='float32').tofile(f'{layer.name}_bias.bin')
        elif isinstance(layer, tf.keras.layers.BatchNormalization):
            np.array(layer.gamma.numpy(), dtype='float32').tofile(f'{layer.name}_gamma.bin')
            np.array(layer.beta.numpy(), dtype='float32').tofile(f'{layer.name}_beta.bin')
            np.array(layer.moving_mean.numpy(), dtype='float32').tofile(f'{layer.name}_moving_mean.bin')
            np.array(layer.moving_variance.numpy(), dtype='float32').tofile(f'{layer.name}_moving_variance.bin')
        elif isinstance(layer, tf.keras.layers.Dense):
            np.array(layer.kernel.numpy(), dtype='float32').tofile(f'{layer.name}_kernel.bin')
            np.array(layer.bias.numpy(), dtype='float32').tofile(f'{layer.name}_bias.bin')

save_weights_torch_style(model)


2025-01-30 11:42:38.856812: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 153600000 exceeds 10% of free system memory.
2025-01-30 11:42:42.575621: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 614400000 exceeds 10% of free system memory.
2025-01-30 11:42:48.008098: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 122880000 exceeds 10% of free system memory.


KeyboardInterrupt: 