In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define a simple neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc = nn.Linear(784, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.fc(x)
        return x

def preprocess_data(data_loader, device):
    processed_data = []
    labels = []
    for inputs, target in data_loader:
        # Perform data preprocessing (placeholder)
        inputs, target = inputs.to(device), target.to(device)
        processed_data.append(inputs.view(inputs.size(0), -1).cpu().numpy())  # Move to CPU before converting to NumPy
        labels.append(target.cpu().numpy())  # Move to CPU before converting to NumPy
    return np.concatenate(processed_data), np.concatenate(labels)

def train(model, device, train_loader, criterion, optimizer, epochs=1):
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def calculate_metrics(start_time, end_time, num_operations):
    duration = end_time - start_time
    throughput = num_operations / duration
    latency = duration / num_operations
    return duration, throughput, latency

def main():
    # Load Fashion MNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Define the neural network, loss function, and optimizer
    model = SimpleNet()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # CPU Preprocessing and Training
    start_time_cpu = time.time()

    # Train on CPU
    cpu_data, cpu_labels = preprocess_data(trainloader, 'cpu')
    cpu_data = torch.from_numpy(cpu_data).float()
    cpu_labels = torch.from_numpy(cpu_labels).long()
    cpu_dataset = torch.utils.data.TensorDataset(cpu_data, cpu_labels)
    cpu_train_loader = torch.utils.data.DataLoader(cpu_dataset, batch_size=64, shuffle=True)
    train(model, 'cpu', cpu_train_loader, criterion, optimizer)

    end_time_cpu = time.time()

    # GPU Preprocessing and Training
    try:
        # Ensure GPU is available
        if torch.cuda.is_available():
            device = torch.device('cuda')
        else:
            raise Exception("CUDA not available.")

        # Reload data for GPU preprocessing and training
        trainloader_gpu = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

        # Data preprocessing on GPU
        gpu_data, gpu_labels = preprocess_data(trainloader_gpu, device)
        gpu_data = torch.from_numpy(gpu_data).float().to(device)
        gpu_labels = torch.from_numpy(gpu_labels).long().to(device)
        gpu_dataset = torch.utils.data.TensorDataset(gpu_data, gpu_labels)
        gpu_train_loader = torch.utils.data.DataLoader(gpu_dataset, batch_size=64, shuffle=True)

        # Train on GPU
        model.to(device)
        optimizer = optim.SGD(model.parameters(), lr=0.01)

        start_time_gpu = time.time()
        train(model, device, gpu_train_loader, criterion, optimizer)
        end_time_gpu = time.time()

        print("GPU Benchmark successful.")
    except Exception as e:
        print(f"GPU Benchmark failed: {e}")
        end_time_gpu = start_time_gpu

    # Calculate and print metrics
    num_operations = len(trainloader.dataset)
    cpu_duration, cpu_throughput, cpu_latency = calculate_metrics(start_time_cpu, end_time_cpu, num_operations)
    gpu_duration, gpu_throughput, gpu_latency = calculate_metrics(start_time_gpu, end_time_gpu, num_operations)

    print("\nPerformance Metrics:")
    print(f"CPU Duration: {cpu_duration:.6f} seconds")
    print(f"CPU Throughput: {cpu_throughput:.6f} ops/second")
    print(f"CPU Latency: {cpu_latency:.12f} seconds/operation\n")

    if 'start_time_gpu' in locals():
        print(f"GPU Duration: {gpu_duration:.6f} seconds")
        print(f"GPU Throughput: {gpu_throughput:.6f} ops/second")
        print(f"GPU Latency: {gpu_latency:.12f} seconds/operation")

if __name__ == "__main__":
    main()


GPU Benchmark successful.

Performance Metrics:
CPU Duration: 14.178895 seconds
CPU Throughput: 4231.641608 ops/second
CPU Latency: 0.000236314909 seconds/operation

GPU Duration: 4.307649 seconds
GPU Throughput: 13928.711794 ops/second
GPU Latency: 0.000071794148 seconds/operation


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define a simple neural network
class SimpleNet(nn.Module):
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc = nn.Linear(784, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.fc(x)
        return x

def preprocess_data(data_loader):
    processed_data = []
    labels = []
    for inputs, target in data_loader:
        # Perform data preprocessing (placeholder)
        processed_data.append(inputs.view(inputs.size(0), -1).numpy())
        labels.append(target.numpy())
    return np.concatenate(processed_data), np.concatenate(labels)

def train(model, device, train_loader, criterion, optimizer, epochs=1):
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def calculate_metrics(start_time, end_time, num_operations):
    duration = end_time - start_time
    throughput = num_operations / duration
    latency = duration / num_operations
    return duration, throughput, latency

def main():
    # Load Fashion MNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Define the neural network, loss function, and optimizer
    model = SimpleNet()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # CPU Preprocessing
    start_time_cpu = time.time()

    cpu_data, cpu_labels = preprocess_data(trainloader)
    cpu_data = torch.from_numpy(cpu_data).float()
    cpu_labels = torch.from_numpy(cpu_labels).long()

    end_time_cpu = time.time()

    # GPU Training
    try:
        # Ensure GPU is available
        if torch.cuda.is_available():
            device = torch.device('cuda')
            model.to(device)
            optimizer = optim.SGD(model.parameters(), lr=0.01)
        else:
            raise Exception("CUDA not available.")

        # Reload data for GPU training
        trainloader_gpu = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

        gpu_data = cpu_data.to(device)
        gpu_labels = cpu_labels.to(device)
        gpu_dataset = torch.utils.data.TensorDataset(gpu_data, gpu_labels)
        gpu_train_loader = torch.utils.data.DataLoader(gpu_dataset, batch_size=64, shuffle=True)

        # Train on GPU
        start_time_gpu = time.time()
        train(model, device, gpu_train_loader, criterion, optimizer)
        end_time_gpu = time.time()

        print("GPU Benchmark successful.")
    except Exception as e:
        print(f"GPU Benchmark failed: {e}")
        end_time_gpu = start_time_gpu

    # Calculate and print metrics
    num_operations = len(trainloader.dataset)
    cpu_duration, cpu_throughput, cpu_latency = calculate_metrics(start_time_cpu, end_time_cpu, num_operations)
    gpu_duration, gpu_throughput, gpu_latency = calculate_metrics(start_time_gpu, end_time_gpu, num_operations)

    # Combine metrics
    total_duration = end_time_gpu - start_time_cpu
    total_throughput = num_operations / total_duration
    avg_latency = total_duration / num_operations

    print("\nCombined Performance Metrics:")
    print(f"Total Duration: {total_duration:.6f} seconds")
    print(f"Total Throughput: {total_throughput:.6f} ops/second")
    print(f"Average Latency: {avg_latency:.12f} seconds/operation\n")

    print("Individual Metrics:")
    print(f"CPU Duration: {cpu_duration:.6f} seconds")
    print(f"CPU Throughput: {cpu_throughput:.6f} ops/second")
    print(f"CPU Latency: {cpu_latency:.12f} seconds/operation")
    print(f"GPU Duration: {gpu_duration:.6f} seconds")
    print(f"GPU Throughput: {gpu_throughput:.6f} ops/second")
    print(f"GPU Latency: {gpu_latency:.12f} seconds/operation")

if __name__ == "__main__":
    main()


GPU Benchmark successful.

Combined Performance Metrics:
Total Duration: 14.786731 seconds
Total Throughput: 4057.691862 ops/second
Average Latency: 0.000246445525 seconds/operation

Individual Metrics:
CPU Duration: 13.566403 seconds
CPU Throughput: 4422.690328 ops/second
CPU Latency: 0.000226106719 seconds/operation
GPU Duration: 1.179706 seconds
GPU Throughput: 50860.115112 ops/second
GPU Latency: 0.000019661772 seconds/operation


#We here adjust the architecture according to our specific requirements

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define a deeper neural network for CPU
class DeepNetCPU(nn.Module):
    def __init__(self):
        super(DeepNetCPU, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x

def preprocess_data_cpu(data_loader):
    processed_data = []
    labels = []
    for inputs, target in data_loader:
        # Perform data preprocessing (placeholder)
        processed_data.append(inputs.view(inputs.size(0), -1).numpy())
        labels.append(target.numpy())
    return np.concatenate(processed_data), np.concatenate(labels)

def train_cpu(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def calculate_metrics_cpu(start_time, end_time, num_operations):
    duration = end_time - start_time
    throughput = num_operations / duration
    latency = duration / num_operations
    return duration, throughput, latency

def main_cpu():
    # Load Fashion MNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Define the deeper neural network for CPU, loss function, and optimizer
    model_cpu = DeepNetCPU()
    criterion_cpu = nn.CrossEntropyLoss()
    optimizer_cpu = optim.SGD(model_cpu.parameters(), lr=0.01)

    # CPU Preprocessing
    start_time_cpu = time.time()

    cpu_data, cpu_labels = preprocess_data_cpu(trainloader)
    cpu_data = torch.from_numpy(cpu_data).float()
    cpu_labels = torch.from_numpy(cpu_labels).long()

    end_time_cpu = time.time()

    # Train on CPU
    train_cpu(model_cpu, trainloader, criterion_cpu, optimizer_cpu)

    # Calculate and print metrics for CPU
    num_operations_cpu = len(trainloader.dataset)
    cpu_duration, cpu_throughput, cpu_latency = calculate_metrics_cpu(start_time_cpu, end_time_cpu, num_operations_cpu)

    print("\nCPU Performance Metrics:")
    print(f"CPU Duration: {cpu_duration:.6f} seconds")
    print(f"CPU Throughput: {cpu_throughput:.6f} ops/second")
    print(f"CPU Latency: {cpu_latency:.12f} seconds/operation\n")

if __name__ == "__main__":
    main_cpu()



CPU Performance Metrics:
CPU Duration: 12.614876 seconds
CPU Throughput: 4756.289219 ops/second
CPU Latency: 0.000210247938 seconds/operation



In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define a deeper neural network for GPU
class DeepNetGPU(nn.Module):
    def __init__(self):
        super(DeepNetGPU, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x

def preprocess_data_gpu(data_loader, device):
    processed_data = []
    labels = []
    for inputs, target in data_loader:
        # Perform data preprocessing (placeholder)
        inputs, target = inputs.to(device), target.to(device)
        processed_data.append(inputs.view(inputs.size(0), -1).cpu().numpy())  # Move to CPU before converting to NumPy
        labels.append(target.cpu().numpy())  # Move to CPU before converting to NumPy
    return np.concatenate(processed_data), np.concatenate(labels)

def train_gpu(model, device, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def calculate_metrics_gpu(start_time, end_time, num_operations):
    duration = end_time - start_time
    throughput = num_operations / duration
    latency = duration / num_operations
    return duration, throughput, latency

def main_gpu():
    # Load Fashion MNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Define the deeper neural network for GPU, loss function, and optimizer
    model_gpu = DeepNetGPU()
    criterion_gpu = nn.CrossEntropyLoss()
    optimizer_gpu = optim.SGD(model_gpu.parameters(), lr=0.01)

    # GPU Training
    try:
        # Ensure GPU is available
        if torch.cuda.is_available():
            device = torch.device('cuda')
            model_gpu.to(device)
            optimizer_gpu = optim.SGD(model_gpu.parameters(), lr=0.01)
        else:
            raise Exception("CUDA not available.")

        # Reload data for GPU training
        trainloader_gpu = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

        gpu_data, gpu_labels = preprocess_data_gpu(trainloader_gpu, device)
        gpu_data = torch.from_numpy(gpu_data).float().to(device)
        gpu_labels = torch.from_numpy(gpu_labels).long().to(device)
        gpu_dataset = torch.utils.data.TensorDataset(gpu_data, gpu_labels)
        gpu_train_loader = torch.utils.data.DataLoader(gpu_dataset, batch_size=64, shuffle=True)

        # Train on GPU
        start_time_gpu = time.time()
        train_gpu(model_gpu, device, gpu_train_loader, criterion_gpu, optimizer_gpu)
        end_time_gpu = time.time()

        print("GPU Benchmark successful.")
    except Exception as e:
        print(f"GPU Benchmark failed: {e}")
        end_time_gpu = start_time_gpu

    # Calculate and print metrics for GPU
    num_operations_gpu = len(trainloader.dataset)
    gpu_duration, gpu_throughput, gpu_latency = calculate_metrics_gpu(start_time_gpu, end_time_gpu, num_operations_gpu)

    print("\nGPU Performance Metrics:")
    print(f"GPU Duration: {gpu_duration:.6f} seconds")
    print(f"GPU Throughput: {gpu_throughput:.6f} ops/second")
    print(f"GPU Latency: {gpu_latency:.12f} seconds/operation\n")

if __name__ == "__main__":
    main_gpu()


GPU Benchmark successful.

GPU Performance Metrics:
GPU Duration: 19.909619 seconds
GPU Throughput: 3013.618716 ops/second
GPU Latency: 0.000331826981 seconds/operation



In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define a deeper neural network
class DeepNet(nn.Module):
    def __init__(self):
        super(DeepNet, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

def preprocess_data(data_loader):
    processed_data = []
    labels = []
    for inputs, target in data_loader:
        # Perform data preprocessing (placeholder)
        processed_data.append(inputs.view(inputs.size(0), -1).numpy())
        labels.append(target.numpy())
    return np.concatenate(processed_data), np.concatenate(labels)

def train(model, device, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def calculate_metrics(start_time, end_time, num_operations):
    duration = end_time - start_time
    throughput = num_operations / duration
    latency = duration / num_operations
    return duration, throughput, latency

def main():
    # Load Fashion MNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Define the deeper neural network, loss function, and optimizer
    model = DeepNet()
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # CPU Preprocessing
    start_time_cpu = time.time()

    cpu_data, cpu_labels = preprocess_data(trainloader)
    cpu_data = torch.from_numpy(cpu_data).float()
    cpu_labels = torch.from_numpy(cpu_labels).long()

    end_time_cpu = time.time()

    # GPU Training
    try:
        # Ensure GPU is available
        if torch.cuda.is_available():
            device = torch.device('cuda')
            model.to(device)
            optimizer = optim.SGD(model.parameters(), lr=0.01)
        else:
            raise Exception("CUDA not available.")

        # Reload data for GPU training
        trainloader_gpu = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

        gpu_data = cpu_data.to(device)
        gpu_labels = cpu_labels.to(device)
        gpu_dataset = torch.utils.data.TensorDataset(gpu_data, gpu_labels)
        gpu_train_loader = torch.utils.data.DataLoader(gpu_dataset, batch_size=64, shuffle=True)

        # Train on GPU
        start_time_gpu = time.time()
        train(model, device, gpu_train_loader, criterion, optimizer)
        end_time_gpu = time.time()

        print("GPU Benchmark successful.")
    except Exception as e:
        print(f"GPU Benchmark failed: {e}")
        end_time_gpu = start_time_gpu

    # Calculate and print metrics
    num_operations = len(trainloader.dataset)
    cpu_duration, cpu_throughput, cpu_latency = calculate_metrics(start_time_cpu, end_time_cpu, num_operations)
    gpu_duration, gpu_throughput, gpu_latency = calculate_metrics(start_time_gpu, end_time_gpu, num_operations)

    # Combine metrics
    total_duration = end_time_gpu - start_time_cpu
    total_throughput = num_operations / total_duration
    avg_latency = total_duration / num_operations

    print("\nCombined Performance Metrics:")
    print(f"Total Duration: {total_duration:.6f} seconds")
    print(f"Total Throughput: {total_throughput:.6f} ops/second")
    print(f"Average Latency: {avg_latency:.12f} seconds/operation\n")

    print("Individual Metrics:")
    print(f"CPU Duration: {cpu_duration:.6f} seconds")
    print(f"CPU Throughput: {cpu_throughput:.6f} ops/second")
    print(f"CPU Latency: {cpu_latency:.12f} seconds/operation")
    print(f"GPU Duration: {gpu_duration:.6f} seconds")
    print(f"GPU Throughput: {gpu_throughput:.6f} ops/second")
    print(f"GPU Latency: {gpu_latency:.12f} seconds/operation")

if __name__ == "__main__":
    main()


GPU Benchmark successful.

Combined Performance Metrics:
Total Duration: 14.911492 seconds
Total Throughput: 4023.742196 ops/second
Average Latency: 0.000248524868 seconds/operation

Individual Metrics:
CPU Duration: 13.083973 seconds
CPU Throughput: 4585.762912 ops/second
CPU Latency: 0.000218066223 seconds/operation
GPU Duration: 1.785562 seconds
GPU Throughput: 33602.864929 ops/second
GPU Latency: 0.000029759367 seconds/operation


#Lets do it for 10 epochs for both CPU and GPU

#Modifies the preprocessing on CPU and training on GPU

#We get the real difference from here in this code

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define a deeper neural network for GPU
class DeepNetGPU(nn.Module):
    def __init__(self):
        super(DeepNetGPU, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x

def preprocess_data_gpu(data_loader, device):
    processed_data = []
    labels = []
    for inputs, target in data_loader:
        # Perform data preprocessing (placeholder)
        inputs, target = inputs.to(device), target.to(device)
        processed_data.append(inputs.view(inputs.size(0), -1).cpu().numpy())
        labels.append(target.cpu().numpy())
    return np.concatenate(processed_data), np.concatenate(labels)

def train_gpu(model, device, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def calculate_metrics_gpu(start_preprocessing, end_preprocessing, start_training, end_training, num_operations):
    preprocessing_duration = end_preprocessing - start_preprocessing
    training_duration = end_training - start_training
    total_duration = preprocessing_duration + training_duration
    throughput = num_operations / total_duration
    preprocessing_latency = preprocessing_duration / num_operations
    training_latency = training_duration / num_operations
    total_latency = total_duration / num_operations

    return total_duration, throughput, preprocessing_latency, training_latency, total_latency

def main_gpu():
    # Load Fashion MNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Define the deeper neural network for GPU, loss function, and optimizer
    model_gpu = DeepNetGPU()
    criterion_gpu = nn.CrossEntropyLoss()
    optimizer_gpu = optim.SGD(model_gpu.parameters(), lr=0.01)

    # GPU Training
    try:
        # Ensure GPU is available
        if torch.cuda.is_available():
            device = torch.device('cuda')
            model_gpu.to(device)
            optimizer_gpu = optim.SGD(model_gpu.parameters(), lr=0.01)
        else:
            raise Exception("CUDA not available.")

        # Reload data for GPU training
        trainloader_gpu = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

        # GPU Preprocessing
        start_time_preprocessing_gpu = time.time()
        gpu_data, gpu_labels = preprocess_data_gpu(trainloader_gpu, device)
        gpu_data = torch.from_numpy(gpu_data).float().to(device)
        gpu_labels = torch.from_numpy(gpu_labels).long().to(device)
        gpu_dataset = torch.utils.data.TensorDataset(gpu_data, gpu_labels)
        gpu_train_loader = torch.utils.data.DataLoader(gpu_dataset, batch_size=64, shuffle=True)
        end_time_preprocessing_gpu = time.time()

        # Train on GPU
        start_time_training_gpu = time.time()
        train_gpu(model_gpu, device, gpu_train_loader, criterion_gpu, optimizer_gpu, epochs=10)
        end_time_training_gpu = time.time()

        print("GPU Benchmark successful.")
    except Exception as e:
        print(f"GPU Benchmark failed: {e}")
        end_time_training_gpu = start_time_training_gpu
        end_time_preprocessing_gpu = start_time_preprocessing_gpu

    # Calculate and print metrics for GPU
    num_operations_gpu = len(trainloader.dataset)
    gpu_duration, gpu_throughput, gpu_preprocessing_latency, gpu_training_latency, gpu_total_latency = calculate_metrics_gpu(
        start_time_preprocessing_gpu, end_time_preprocessing_gpu, start_time_training_gpu, end_time_training_gpu,
        num_operations_gpu
    )

    print("\nGPU Performance Metrics:")
    print(f"GPU Total Duration: {gpu_duration:.6f} seconds")
    print(f"GPU Throughput: {gpu_throughput:.6f} ops/second")
    print(f"GPU Preprocessing Latency: {gpu_preprocessing_latency:.12f} seconds/operation")
    print(f"GPU Training Latency: {gpu_training_latency:.12f} seconds/operation")
    print(f"GPU Total Latency: {gpu_total_latency:.12f} seconds/operation\n")

if __name__ == "__main__":
    main_gpu()


GPU Benchmark successful.

GPU Performance Metrics:
GPU Total Duration: 34.816493 seconds
GPU Throughput: 1723.321208 ops/second
GPU Preprocessing Latency: 0.000228734322 seconds/operation
GPU Training Latency: 0.000351540554 seconds/operation
GPU Total Latency: 0.000580274876 seconds/operation



In [None]:
#Take this
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define a deeper neural network for CPU
class DeepNetCPU(nn.Module):
    def __init__(self):
        super(DeepNetCPU, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x

def preprocess_data_cpu(data_loader):
    processed_data = []
    labels = []
    for inputs, target in data_loader:
        # Perform data preprocessing (placeholder)
        processed_data.append(inputs.view(inputs.size(0), -1).numpy())
        labels.append(target.numpy())
    return np.concatenate(processed_data), np.concatenate(labels)

def train_cpu(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def calculate_metrics_cpu(start_preprocessing, end_preprocessing, start_training, end_training, num_operations):
    preprocessing_duration = end_preprocessing - start_preprocessing
    training_duration = end_training - start_training
    total_duration = preprocessing_duration + training_duration
    throughput = num_operations / total_duration
    preprocessing_latency = preprocessing_duration / num_operations
    training_latency = training_duration / num_operations

    return total_duration, throughput, preprocessing_latency, training_latency

def main_cpu():
    # Load Fashion MNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Define the deeper neural network for CPU, loss function, and optimizer
    model_cpu = DeepNetCPU()
    criterion_cpu = nn.CrossEntropyLoss()
    optimizer_cpu = optim.SGD(model_cpu.parameters(), lr=0.01)

    # CPU Preprocessing
    start_time_preprocessing = time.time()

    cpu_data, cpu_labels = preprocess_data_cpu(trainloader)
    cpu_data = torch.from_numpy(cpu_data).float()
    cpu_labels = torch.from_numpy(cpu_labels).long()

    end_time_preprocessing = time.time()

    # Train on CPU
    start_time_training = time.time()
    train_cpu(model_cpu, trainloader, criterion_cpu, optimizer_cpu)
    end_time_training = time.time()

    # Calculate and print metrics for CPU
    num_operations_cpu = len(trainloader.dataset)
    cpu_duration, cpu_throughput, cpu_preprocessing_latency, cpu_training_latency = calculate_metrics_cpu(
        start_time_preprocessing, end_time_preprocessing, start_time_training, end_time_training, num_operations_cpu
    )

    print("\nCPU Performance Metrics:")
    print(f"CPU Total Duration: {cpu_duration:.6f} seconds")
    print(f"CPU Throughput: {cpu_throughput:.6f} ops/second")
    print(f"CPU Preprocessing Latency: {cpu_preprocessing_latency:.12f} seconds/operation")
    print(f"CPU Training Latency: {cpu_training_latency:.12f} seconds/operation\n")

if __name__ == "__main__":
    main_cpu()



CPU Performance Metrics:
CPU Total Duration: 181.355536 seconds
CPU Throughput: 330.841845 ops/second
CPU Preprocessing Latency: 0.000209672403 seconds/operation
CPU Training Latency: 0.002812919859 seconds/operation



In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import time

# Define a deeper neural network for GPU
class DeepNetGPU(nn.Module):
    def __init__(self):
        super(DeepNetGPU, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x

def preprocess_data_cpu(data_loader):
    processed_data = []
    labels = []
    for inputs, target in data_loader:
        # Perform data preprocessing on CPU (placeholder)
        processed_data.append(inputs.view(inputs.size(0), -1).numpy())
        labels.append(target.numpy())
    return np.concatenate(processed_data), np.concatenate(labels)

def train_gpu(model, device, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

def calculate_metrics(start_cpu, end_cpu, start_gpu, end_gpu, num_operations):
    cpu_duration = end_cpu - start_cpu
    gpu_duration = end_gpu - start_gpu
    overall_duration = end_gpu - start_cpu

    cpu_throughput = num_operations / cpu_duration
    gpu_throughput = num_operations / gpu_duration
    overall_throughput = num_operations / overall_duration

    cpu_latency = cpu_duration / num_operations
    gpu_latency = gpu_duration / num_operations
    overall_latency = overall_duration / num_operations

    return cpu_duration, cpu_throughput, cpu_latency, gpu_duration, gpu_throughput, gpu_latency, overall_duration, overall_throughput, overall_latency

def main_cpu_gpu():
    # Load Fashion MNIST dataset
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    trainset = torchvision.datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Define the deeper neural network for GPU, loss function, and optimizer
    model_gpu = DeepNetGPU()
    criterion_gpu = nn.CrossEntropyLoss()
    optimizer_gpu = optim.SGD(model_gpu.parameters(), lr=0.01)

    # CPU Preprocessing
    start_time_cpu = time.time()

    cpu_data, cpu_labels = preprocess_data_cpu(trainloader)
    cpu_data = torch.from_numpy(cpu_data).float()
    cpu_labels = torch.from_numpy(cpu_labels).long()

    end_time_cpu = time.time()

    # GPU Training
    try:
        # Ensure GPU is available
        if torch.cuda.is_available():
            device = torch.device('cuda')
            model_gpu.to(device)
            optimizer_gpu = optim.SGD(model_gpu.parameters(), lr=0.01)
        else:
            raise Exception("CUDA not available.")

        # Reload data for GPU training
        trainloader_gpu = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

        gpu_data, gpu_labels = preprocess_data_cpu(trainloader_gpu)  # Preprocess on CPU for GPU training
        gpu_data = torch.from_numpy(gpu_data).float().to(device)
        gpu_labels = torch.from_numpy(gpu_labels).long().to(device)
        gpu_dataset = torch.utils.data.TensorDataset(gpu_data, gpu_labels)
        gpu_train_loader = torch.utils.data.DataLoader(gpu_dataset, batch_size=64, shuffle=True)

        # Train on GPU
        start_time_gpu = time.time()
        train_gpu(model_gpu, device, gpu_train_loader, criterion_gpu, optimizer_gpu)
        end_time_gpu = time.time()

        print("GPU Benchmark successful.")
    except Exception as e:
        print(f"GPU Benchmark failed: {e}")
        end_time_gpu = start_time_gpu

    # Calculate and print metrics for CPU and GPU
    num_operations_cpu = len(trainloader.dataset)
    num_operations_gpu = len(trainloader_gpu.dataset)
    cpu_duration, cpu_throughput, cpu_latency, gpu_duration, gpu_throughput, gpu_latency, overall_duration, overall_throughput, overall_latency = calculate_metrics(
        start_time_cpu, end_time_cpu, start_time_gpu, end_time_gpu, num_operations_gpu
    )

    print("\nCPU Performance Metrics:")
    print(f"CPU Duration: {cpu_duration:.6f} seconds")
    print(f"CPU Throughput: {cpu_throughput:.6f} ops/second")
    print(f"CPU Latency: {cpu_latency:.12f} seconds/operation\n")

    print("\nGPU Performance Metrics:")
    print(f"GPU Duration: {gpu_duration:.6f} seconds")
    print(f"GPU Throughput: {gpu_throughput:.6f} ops/second")
    print(f"GPU Latency: {gpu_latency:.12f} seconds/operation\n")

    print("\nOverall Performance Metrics:")
    print(f"Overall Duration: {overall_duration:.6f} seconds")
    print(f"Overall Throughput: {overall_throughput:.6f} ops/second")
    print(f"Overall Latency: {overall_latency:.12f} seconds/operation\n")

if __name__ == "__main__":
    main_cpu_gpu()


GPU Benchmark successful.

CPU Performance Metrics:
CPU Duration: 14.222668 seconds
CPU Throughput: 4218.617651 ops/second
CPU Latency: 0.000237044473 seconds/operation


GPU Performance Metrics:
GPU Duration: 22.914371 seconds
GPU Throughput: 2618.444064 ops/second
GPU Latency: 0.000381906192 seconds/operation


Overall Performance Metrics:
Overall Duration: 50.143075 seconds
Overall Throughput: 1196.575998 ops/second
Overall Latency: 0.000835717916 seconds/operation

