In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split
import os
import zipfile
import urllib.request
import time

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

# Step 1: Download TinyImageNet dataset (subset of ImageNet)
url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
data_dir = './data/tiny-imagenet-200'

if not os.path.exists(data_dir):
    print("Downloading TinyImageNet dataset...")
    urllib.request.urlretrieve(url, './tiny-imagenet-200.zip')

    # Extracting the dataset
    with zipfile.ZipFile('./tiny-imagenet-200.zip', 'r') as zip_ref:
        zip_ref.extractall('./data')
    print("Dataset downloaded and extracted.")

# Step 2: Define data transformations
input_size = 224
batch_size = 32

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Step 3: Load datasets
train_dir = os.path.join(data_dir, 'train')

# TinyImageNet has no explicit train/val split; let's create one
full_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])

# Use only the first 80 classes for your use case
class_subset = 80
targets = torch.tensor([sample[1] for sample in full_dataset.samples])
indices = [i for i in range(len(targets)) if targets[i] < class_subset]

# Create subset dataset
subset_dataset = torch.utils.data.Subset(full_dataset, indices)

# Split the subset into training and validation (80% train, 20% val)
train_size = int(0.8 * len(subset_dataset))
val_size = len(subset_dataset) - train_size
train_dataset, val_dataset = random_split(subset_dataset, [train_size, val_size])

# Create DataLoaders for training and validation datasets
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Step 4: Define ResNet-50 model
model = models.resnet50(pretrained=False)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, class_subset)  # Modify for 80 classes
model = model.to(device)

# Step 5: Define loss function, optimizer, and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Step 6: Training with computation counter
def train_model_with_counter(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=10):
    total_start_time = time.time()
    total_computations = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 20)

        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        epoch_start_time = time.time()

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            # Compute FLOPs and update counter
            total_computations += inputs.size(0) * inputs.size(2) * inputs.size(3) * num_features * class_subset

        epoch_duration = time.time() - epoch_start_time
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct / total
        print(f'Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_acc:.4f}')
        print(f'Epoch {epoch + 1} duration: {epoch_duration:.4f} seconds')

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_epoch_acc = val_correct / val_total
        print(f'Val Loss: {val_epoch_loss:.4f}, Val Accuracy: {val_epoch_acc:.4f}')

        scheduler.step()

    total_duration = time.time() - total_start_time
    print(f'Total training time: {total_duration:.4f} seconds')
    print(f'Total Computations (FLOPs approximation): {total_computations:.2e}')
    return total_duration, total_computations

# Start training and measure time
total_training_time, total_computations = train_model_with_counter(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=10)

# Assuming the CPU clock speed of Google Colab is ~2.3 GHz (check your instance using `!lscpu`)
cpu_clock_speed_ghz = 2.3
total_clock_cycles = total_training_time * cpu_clock_speed_ghz * 1e9
print(f"Estimated Total Clock Cycles: {total_clock_cycles:.2e} cycles")


Using device: cuda


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


Epoch 1/10
--------------------
Train Loss: 4.1929, Train Accuracy: 0.0493
Epoch 1 duration: 213.2735 seconds
Val Loss: 4.0508, Val Accuracy: 0.0664
Epoch 2/10
--------------------
Train Loss: 3.9087, Train Accuracy: 0.0874
Epoch 2 duration: 211.8815 seconds
Val Loss: 3.8032, Val Accuracy: 0.1174
Epoch 3/10
--------------------
Train Loss: 3.7208, Train Accuracy: 0.1207
Epoch 3 duration: 212.3744 seconds
Val Loss: 3.7822, Val Accuracy: 0.1154
Epoch 4/10
--------------------
Train Loss: 3.5542, Train Accuracy: 0.1506
Epoch 4 duration: 204.1191 seconds
Val Loss: 3.6045, Val Accuracy: 0.1529
Epoch 5/10
--------------------
Train Loss: 3.3952, Train Accuracy: 0.1816
Epoch 5 duration: 204.3645 seconds
Val Loss: 3.3791, Val Accuracy: 0.2009
Epoch 6/10
--------------------
Train Loss: 3.2550, Train Accuracy: 0.2087
Epoch 6 duration: 202.4921 seconds
Val Loss: 3.2903, Val Accuracy: 0.2195
Epoch 7/10
--------------------
Train Loss: 3.1196, Train Accuracy: 0.2344
Epoch 7 duration: 201.2007 seco

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, random_split
import os

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

# Data transformations
input_size = 224
batch_size = 32
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Load datasets
data_dir = './data/tiny-imagenet-200/train'
full_dataset = datasets.ImageFolder(data_dir, transform=data_transforms['train'])
class_subset = 80

targets = torch.tensor([sample[1] for sample in full_dataset.samples])
indices = [i for i in range(len(targets)) if targets[i] < class_subset]
subset_dataset = torch.utils.data.Subset(full_dataset, indices)

train_size = int(0.8 * len(subset_dataset))
val_size = len(subset_dataset) - train_size
train_dataset, val_dataset = random_split(subset_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Define ResNet-50 model
model = models.resnet50(pretrained=False)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, class_subset)  # Modify for 80 classes
model = model.to(device)

# Loss, optimizer, and scheduler
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

# Training function with computation counter
def train_model_with_computations(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=10):
    total_computations = 0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 20)

        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        epoch_computations = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            # Calculate FLOPs approximation
            epoch_computations += inputs.size(0) * inputs.size(2) * inputs.size(3) * num_features * class_subset

        train_loss = running_loss / len(train_loader.dataset)
        train_acc = correct / total
        total_computations += epoch_computations

        print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.4f}')
        print(f'Computations for Epoch {epoch + 1}: {epoch_computations:.2e}')

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_epoch_loss = val_loss / len(val_loader.dataset)
        val_epoch_acc = val_correct / val_total
        print(f'Val Loss: {val_epoch_loss:.4f}, Val Accuracy: {val_epoch_acc:.4f}')

        scheduler.step()

    print(f'Total Computations (FLOPs approximation): {total_computations:.2e}')
    return total_computations

# Train and compute
total_computations = train_model_with_computations(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs=3)


Using device: cuda
Epoch 1/3
--------------------
Train Loss: 4.1769, Train Accuracy: 0.0515
Computations for Epoch 1: 2.63e+14
Val Loss: 4.0526, Val Accuracy: 0.0691
Epoch 2/3
--------------------
Train Loss: 3.9412, Train Accuracy: 0.0832
Computations for Epoch 2: 2.63e+14
Val Loss: 4.3925, Val Accuracy: 0.0747
Epoch 3/3
--------------------
Train Loss: 3.7488, Train Accuracy: 0.1174
Computations for Epoch 3: 2.63e+14
Val Loss: 3.8175, Val Accuracy: 0.1333
Total Computations (FLOPs approximation): 7.89e+14


In [3]:
import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Function to generate RPQ matrix
def random_projection_matrix(input_dim, output_dim):
    """Generates a random projection matrix with mean=0 and variance=1."""
    return torch.randn(input_dim, output_dim).to(device)

# Dimensions for RPQ
input_dim = 2048  # Example input feature size
output_dim = 2048  # Example output feature size

# Generate the RPQ matrix
rpq_matrix = random_projection_matrix(input_dim, output_dim)

# Compute statistics
mean = rpq_matrix.mean().item()
variance = rpq_matrix.var().item()

# Print results
print(f"RPQ Matrix Mean: {mean:.4f}")
print(f"RPQ Matrix Variance: {variance:.4f}")

# Verify application with dummy input
dummy_input = torch.randn(32, input_dim).to(device)  # Batch size of 32, input_dim features
projected_output = torch.matmul(dummy_input, rpq_matrix)

# Check shape and statistics of projected output
print(f"Projected Output Shape: {projected_output.shape}")
projected_mean = projected_output.mean().item()
projected_variance = projected_output.var().item()
print(f"Projected Output Mean: {projected_mean:.4f}")
print(f"Projected Output Variance: {projected_variance:.4f}")


RPQ Matrix Mean: -0.0005
RPQ Matrix Variance: 1.0000
Projected Output Shape: torch.Size([32, 2048])
Projected Output Mean: -0.1055
Projected Output Variance: 2027.4707
